diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e533ee6..23b363a 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3453,6 +3453,23 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Exceeding this will trigger a flush irrelevant of memory pressure condition."), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1, "Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded."), + HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED( + "hive.vectorized.execution.groupby.native.enabled", true, + "This flag should be set to true to enable the native vectorization of queries using GroupBy.\n" + + "The default value is true."), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE( + "hive.test.vectorized.execution.groupby.native.override", + "none", new StringSet("none", "enable", "disable"), + "internal use only, used to override the hive.vectorized.execution.groupby.native.enabled\n" + + "setting. Using enable will force it on and disable will force it off.\n" + + "The default none is do nothing, of course", + true), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE( + "hive.test.vectorized.groupby.native.max.memory.available", -1, + "internal use only, used for creating different vectorized hash table sizes\n" + + "to exercise more logic\n" + + "The default value is -1 which means don't use it", + true), HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED("hive.vectorized.execution.reducesink.new.enabled", true, "This flag should be set to true to enable the new vectorization\n" + "of queries using ReduceSink.\ni" + diff --git data/files/groupby_long_1a.txt data/files/groupby_long_1a.txt new file mode 100644 index 0000000..8cf831f --- /dev/null +++ data/files/groupby_long_1a.txt @@ -0,0 +1,11 @@ +-5310365297525168078 +-6187919478609154811 +968819023021777205 +3313583664488247651 +-5206670856103795573 +\N +-6187919478609154811 +1569543799237464101 +-6187919478609154811 +-8460550397108077433 +-6187919478609154811 diff --git data/files/groupby_long_1a_nonull.txt data/files/groupby_long_1a_nonull.txt new file mode 100644 index 0000000..b2325ad --- /dev/null +++ data/files/groupby_long_1a_nonull.txt @@ -0,0 +1,10 @@ +1569543799237464101 +-6187919478609154811 +968819023021777205 +-8460550397108077433 +-6187919478609154811 +-5310365297525168078 +-6187919478609154811 +-5206670856103795573 +3313583664488247651 +-6187919478609154811 diff --git data/files/groupby_long_1b.txt data/files/groupby_long_1b.txt new file mode 100644 index 0000000..87c2b3c --- /dev/null +++ data/files/groupby_long_1b.txt @@ -0,0 +1,13 @@ +\N +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/groupby_long_1b_nonull.txt data/files/groupby_long_1b_nonull.txt new file mode 100644 index 0000000..0b438a2 --- /dev/null +++ data/files/groupby_long_1b_nonull.txt @@ -0,0 +1,12 @@ +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/groupby_long_1c.txt data/files/groupby_long_1c.txt new file mode 100644 index 0000000..2d13c26 --- /dev/null +++ data/files/groupby_long_1c.txt @@ -0,0 +1,11 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +\N,ABBZ +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/groupby_long_1c_nonull.txt data/files/groupby_long_1c_nonull.txt new file mode 100644 index 0000000..f6bc6e8 --- /dev/null +++ data/files/groupby_long_1c_nonull.txt @@ -0,0 +1,10 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/groupby_serialize_1a.txt data/files/groupby_serialize_1a.txt new file mode 100644 index 0000000..cae1ecc --- /dev/null +++ data/files/groupby_serialize_1a.txt @@ -0,0 +1,17 @@ +2061-12-19 22:10:32.000628309 +\N +2686-05-23 07:46:46.565832918 +2082-07-14 04:00:40.695380469 +2188-06-04 15:03:14.963259704 +2608-02-23 23:44:02.546440891 +2093-04-10 23:36:54.846 +2898-10-01 22:27:02.000871113 +2306-06-21 11:02:00.143124239 +\N +\N +2306-06-21 11:02:00.143124239 +2093-04-10 23:36:54.846 +\N +2686-05-23 07:46:46.565832918 +2093-04-10 23:36:54.846 +2299-11-15 16:41:30.401 diff --git data/files/groupby_serialize_1a_nonull.txt data/files/groupby_serialize_1a_nonull.txt new file mode 100644 index 0000000..0520a9a --- /dev/null +++ data/files/groupby_serialize_1a_nonull.txt @@ -0,0 +1,13 @@ +2061-12-19 22:10:32.000628309 +2686-05-23 07:46:46.565832918 +2082-07-14 04:00:40.695380469 +2188-06-04 15:03:14.963259704 +2608-02-23 23:44:02.546440891 +2093-04-10 23:36:54.846 +2898-10-01 22:27:02.000871113 +2306-06-21 11:02:00.143124239 +2306-06-21 11:02:00.143124239 +2093-04-10 23:36:54.846 +2686-05-23 07:46:46.565832918 +2093-04-10 23:36:54.846 +2299-11-15 16:41:30.401 diff --git data/files/groupby_serialize_1b.txt data/files/groupby_serialize_1b.txt new file mode 100644 index 0000000..c47bae0 --- /dev/null +++ data/files/groupby_serialize_1b.txt @@ -0,0 +1,47 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +\N,\N,\N,-2207.3 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +\N,\N,\N,-2.4 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 \ No newline at end of file diff --git data/files/groupby_serialize_1b_nonull.txt data/files/groupby_serialize_1b_nonull.txt new file mode 100644 index 0000000..e640b42 --- /dev/null +++ data/files/groupby_serialize_1b_nonull.txt @@ -0,0 +1,66 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 +2512-10-06 03:03:03,1560,X,-922.6951584107 +2396-04-06 15:39:02.404013577,29661,ZSMB,0.76718326 +2409-09-23 10:33:27,2638,XSXR,0.4 +2969-01-23 14:08:04.000667259,6689,TFGVOGPJF,-0.01 +2333-07-28 09:59:26,23196,RKSK,37872288434740893.5 +2409-09-23 10:33:27,2638,XSXR,-162.95 +2357-05-08 07:09:09.000482799,6226,ZSMB,-472 +2304-12-15 15:31:16,15090,G,-4319470286240016.3 +2304-12-15 15:31:16,1301,T,61.302 +2105-01-04 16:27:45,23100,ZSMB,-83.2328 +2242-08-04 07:51:46.905,20223,UCYXACQ,-0.26149 +2637-03-12 22:25:46.385,-17786,HYEGQ,-84.169614329419 +1931-12-04 11:13:47.269597392,23196,HVJCQMTQL,-9697532.8994 +2897-08-10 15:21:47.09,23663,XYUVBED,6370 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,57.62175257788037 +2145-10-15 06:58:42.831,2638,UANGISEXR,-5996.306 +2462-12-16 23:11:32.633305644,-26373,CB,67.41799 +2396-04-06 15:39:02.404013577,29661,ZSMB,-5151598.347 +2304-12-15 15:31:16,15090,G,975 +2512-10-06 03:03:03,32099,ARNZ,-0.41 +2188-06-04 15:03:14.963259704,9468,AAA,2.75496352 \ No newline at end of file diff --git data/files/groupby_string_1a.txt data/files/groupby_string_1a.txt new file mode 100644 index 0000000..1cbcd05 --- /dev/null +++ data/files/groupby_string_1a.txt @@ -0,0 +1,13 @@ +FTWURVH +QNCYBDW +UA +WXHJ +\N +WXHJ +PXLD +WXHJ +PXLD +WXHJ +WXHJ +MXGDMBD +PXLD diff --git data/files/groupby_string_1a_nonull.txt data/files/groupby_string_1a_nonull.txt new file mode 100644 index 0000000..a6566f2 --- /dev/null +++ data/files/groupby_string_1a_nonull.txt @@ -0,0 +1,12 @@ +WXHJ +WXHJ +FTWURVH +MXGDMBD +UA +WXHJ +QNCYBDW +PXLD +PXLD +WXHJ +PXLD +WXHJ diff --git data/files/groupby_string_1c.txt data/files/groupby_string_1c.txt new file mode 100644 index 0000000..f223da0 --- /dev/null +++ data/files/groupby_string_1c.txt @@ -0,0 +1,38 @@ +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +,1985-01-22,2111-01-10 15:44:28 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +\N,1865-11-08,2893-04-07 07:36:12 +BEP,2206-08-10,2331-10-09 10:59:51 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +IWEZJHKE,\N,\N +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +KL,1980-09-22,2073-08-25 11:51:10.318 +\N,1915-02-22,2554-10-27 09:34:30 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +GOYJHW,1959-04-27,\N +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +LOTLS,2126-09-16,1977-12-15 15:28:56 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +,2021-02-21,2802-04-21 18:48:18.5933838 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +SDA,2196-04-12,2462-10-26 19:28:12.733 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +FYW,1807-03-20,2305-08-17 01:32:44 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 diff --git data/files/groupby_string_1c_nonull.txt data/files/groupby_string_1c_nonull.txt new file mode 100644 index 0000000..6b97ef4 --- /dev/null +++ data/files/groupby_string_1c_nonull.txt @@ -0,0 +1,35 @@ +LOTLS,2126-09-16,1977-12-15 15:28:56 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +FYW,1807-03-20,2305-08-17 01:32:44 +,2021-02-21,2802-04-21 18:48:18.5933838 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +BEP,2206-08-10,2331-10-09 10:59:51 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +,1985-01-22,2111-01-10 15:44:28 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +GOYJHW,1959-04-27,\N +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +KL,1980-09-22,2073-08-25 11:51:10.318 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +IWEZJHKE,\N,\N +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 diff --git ql/pom.xml ql/pom.xml index 165610f..ed0dc8a 100644 --- ql/pom.xml +++ ql/pom.xml @@ -835,6 +835,7 @@ classpath="${compile.classpath}"/> + diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt new file mode 100644 index 0000000..e5ff533 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt @@ -0,0 +1,311 @@ +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific ColumnVector import code lines. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +#IF STRING_KEY +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +#ENDIF STRING_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific Operator import code lines. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_OPERATOR_IMPORTS +#IF STRING_KEY +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific transient variables. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_TRANSIENT +#IF SERIALIZE_KEY + // Object that can take the single column in row in a vectorized row batch and serialized it. + // The key is not NULL. + private transient VectorSerializeRow keyVectorSerializeWrite; + + // The BinarySortable serialization of the current key. + private transient Output currentKeyOutput; + + // The BinarySortable serialization of the next key for a possible series of equal keys. + private transient Output nextKeyOutput; + +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific initializeOp code lines. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP +#IF SERIALIZE_KEY + + keyVectorSerializeWrite = + new VectorSerializeRow( + new BinarySortableSerializeWrite(1)); + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + int[] columnMap = new int[] { groupByKeyExpressions[0].getOutputColumnNum() }; + keyVectorSerializeWrite.init(typeInfos, columnMap); + + currentKeyOutput = new Output(); + nextKeyOutput = new Output(); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Helpful variables for accessing the key values for the LONG and STRING variations. +#COMMENT (None needed for SERIALIZE) +#COMMENT +#BEGIN_LINES KEY_VECTOR_VARIABLES +#IF LONG_KEY + long[] keyVector = keyColVector.vector; +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[][] keyVector = keyColVector.vector; + final int[] keyStart = keyColVector.start; + final int[] keyLength = keyColVector.length; +#ENDIF STRING_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for logical (i.e. selectedInUse) and the batch's keys have no +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + final int firstBatchIndex = selected[0]; +#IF LONG_KEY + long currentKey = keyVector[firstBatchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey = keyVector[firstBatchIndex]; + int currentKeyStart = keyStart[firstBatchIndex]; + int currentKeyLength = keyLength[firstBatchIndex]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + byte[] currentKey = currentKeyOutput.getData(); + int currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for logical (i.e. selectedInUse) and the batch's keys may have +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES + boolean[] keyIsNull = keyColVector.isNull; + boolean currKeyIsNull; + +#IF LONG_KEY + long currentKey; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey; + int currentKeyStart; + int currentKeyLength; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + byte[] currentKey; + int currentKeyLength; +#ENDIF SERIALIZE_KEY + final int firstBatchIndex = selected[0]; + if (keyIsNull[firstBatchIndex]) { + currKeyIsNull = true; +#IF LONG_KEY + currentKey = 0; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = null; + currentKeyStart = 0; + currentKeyLength = 0; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + currentKey = null; + currentKeyLength = 0; +#ENDIF SERIALIZE_KEY + } else { + currKeyIsNull = false; +#IF LONG_KEY + currentKey = keyVector[firstBatchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = keyVector[firstBatchIndex]; + currentKeyStart = keyStart[firstBatchIndex]; + currentKeyLength = keyLength[firstBatchIndex]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for physical (i.e. NOT selectedInUse) and the batch's keys have no +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#IF LONG_KEY + long currentKey = keyVector[0]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey = keyVector[0]; + int currentKeyStart = keyStart[0]; + int currentKeyLength = keyLength[0]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] currentKey = currentKeyOutput.getData(); + int currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for physical (i.e. NOT selectedInUse) and the batch's keys may have +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + boolean[] keyIsNull = keyColVector.isNull; + boolean currKeyIsNull; + +#IF LONG_KEY + long currentKey; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey; + int currentKeyStart; + int currentKeyLength; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + byte[] currentKey; + int currentKeyLength; +#ENDIF SERIALIZE_KEY + if (keyIsNull[0]) { + currKeyIsNull = true; +#IF LONG_KEY + currentKey = 0; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = null; + currentKeyStart = 0; + currentKeyLength = 0; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + currentKey = null; + currentKeyLength = 0; +#ENDIF SERIALIZE_KEY + } else { + currKeyIsNull = false; +#IF LONG_KEY + currentKey = keyVector[0]; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = keyVector[0]; + currentKeyStart = keyStart[0]; + currentKeyLength = keyLength[0]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Get next key value at batchIndex. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES GET_NEXT_KEY +#IF LONG_KEY + final long nextKey = keyVector[batchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] nextKey = keyVector[batchIndex]; + final int nextKeyStart = keyStart[batchIndex]; + final int nextKeyLength = keyLength[batchIndex]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(nextKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + final byte[] nextKey = nextKeyOutput.getData(); + final int nextKeyLength = nextKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT IF statement next key value equals current key value for all variations. +#COMMENT +#BEGIN_LINES IF_NEXT_EQUALS_CURRENT +#IF LONG_KEY + if (currentKey == nextKey) { +#ENDIF LONG_KEY +#IF STRING_KEY + if (StringExpr.equal( + currentKey, currentKeyStart, currentKeyLength, + nextKey, nextKeyStart, nextKeyLength)) { +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + if (StringExpr.equal( + currentKey, 0, currentKeyLength, + nextKey, 0, nextKeyLength)) { +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT ELSE IF statement next key value equals current key value for all variations. +#COMMENT +#BEGIN_LINES ELSE_IF_NEXT_EQUALS_CURRENT +#IF LONG_KEY + } else if (currentKey == nextKey) { +#ENDIF LONG_KEY +#IF STRING_KEY + } else if (StringExpr.equal( + currentKey, currentKeyStart, currentKeyLength, + nextKey, nextKeyStart, nextKeyLength)) { +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + } else if (StringExpr.equal( + currentKey, 0, currentKeyLength, + nextKey, 0, nextKeyLength)) { +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Remember the next key value as the current key value. All variations. +#COMMENT +#BEGIN_LINES NEW_CURRENT_KEY + currentKey = nextKey; +#IF STRING_KEY + currentKeyStart = nextKeyStart; + currentKeyLength = nextKeyLength; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + currentKeyLength = nextKeyLength; + final Output tempOutput = nextKeyOutput; + nextKeyOutput = currentKeyOutput; + currentKeyOutput = tempOutput; +#ENDIF SERIALIZE_KEY +#END_LINES \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt new file mode 100644 index 0000000..b64e50b --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt @@ -0,0 +1,438 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES SINGLE_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a Native Vectorized GroupBy with no aggregation. + * + * It is used on a single key for duplicate key reduction. + * + * Final duplicate elimination must be done in reduce-shuffle and a reducer since with hash table + * overflow some duplicates can slip through. And, of course, other vertices may contribute + * the same keys. + */ +public class + extends VectorGroupByHashKeyDuplicateReductionOperatorBase { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + +#USE_LINES SINGLE_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashSingleKeyCommonLines +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED +#IF LONG_KEY + if (currentKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, find or create the hash table +#COMMENT entry. All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_DUPLICATE_REDUCTION_KEY +#IF LONG_KEY + if (currentKey == 0) { + + // We don't store 0 in the slot table so it can be used to indicate an empty slot. + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, find or create the +#COMMENT hash table entry. All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_DUPLICATE_REDUCTION_KEY + if (!currKeyIsNull) { +#IF LONG_KEY + if (currentKey == 0) { + + // We don't store 0 in the slot table so it can be used to indicate an empty slot. + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SERIALIZE_KEY + } +#END_LINES + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For the all NULL or all 0 keys case we note NULL/0 key exists. Otherwise, we do the + * find/create. + */ + protected void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + if (repeatingKey == 0) { + + // We don't store 0 in the slot table so it can be used to indicate an empty slot. + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesDuplicateReductionKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength)); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesDuplicateReductionKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength)); +#ENDIF SERIALIZE_KEY + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + } + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key. + */ + protected void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current key ended. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_DUPLICATE_REDUCTION_KEY + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists since we don't represent it in the slot table. + * + * Do find/create on each non-NULL key. + */ + protected void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES + + if (currKeyIsNull) { + haveNullKey = true; + } + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + + // We note we encountered a NULL key. + haveNullKey = true; + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current non-NULL key ended by another non-NULL key. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_DUPLICATE_REDUCTION_KEY + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NO NULLS key case. + * + * (For remaining comments for handleLogicalNoNullsKey). + */ + protected void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current key ended. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_DUPLICATE_REDUCTION_KEY + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NULLS key case. + * + * (For remaining comments for handleLogicalNullsKey). + * + */ + protected void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + + if (currKeyIsNull) { + haveNullKey = true; + } + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + + // We note we encountered a NULL key. + haveNullKey = true; + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended by non-NULL key. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current non-NULL key ended by non-NULL key. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_DUPLICATE_REDUCTION_KEY + } + + @Override + protected void outputSingleKeys( + ColumnVector keyColumnVector) throws HiveException { + +#IF LONG_KEY + doOutputLongKeys((LongColumnVector) keyColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeys((BytesColumnVector) keyColumnVector); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + doOutputSerializeKeys(keyColumnVector); +#ENDIF SERIALIZE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyOperatorBase.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyOperatorBase.txt new file mode 100644 index 0000000..354932e --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyOperatorBase.txt @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key..VectorGroupByHashKeyTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS + +/* + * An single key map optimized for Native Vectorized GroupBy. + */ +public abstract class + extends VectorGroupByHashKeyTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashSingleKeyCommonLines + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + protected abstract void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException; + + protected abstract void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException; + + protected abstract void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException; + + protected abstract void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException; + + protected abstract void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException; +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountColumnOperator.txt new file mode 100644 index 0000000..e78dfaa --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountColumnOperator.txt @@ -0,0 +1,1208 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES SINGLE_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a COUNT(non-key-column) Native Vectorized GroupBy. That is, + * the grouping is being done on a single long key and the counting + * is for a another ("non-key") column (which can be any data type). + * + * We make a single pass. We loop over key column and process the keys. We look for + * sequences of NULL keys or equal keys. And, at the same time do any processing for the + * non-key-column counting. + * + * NOTE: Both NULL and non-NULL keys have counts for non-key-columns. So, after counting the + * non-NULL fields for the non-key-column, we always do a hash table find/create even when the count + * is 0 since the all those keys must be part of the output result. + + // A key will get created even when there are no non-NULL column values. Count includes 0. + + findOrCreateLongZeroCountKey( + key, + longKeySeries.currentHashCode, + nonNullCount); + + */ +public class + extends VectorGroupByHashKeySingleCountOperatorBase { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + +#USE_LINES SINGLE_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = singleCountAggregation.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashSingleKeyCommonLines +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES CURRENT_COLUMN_COUNT_KEY_ENDED +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT ignore if it already present since the count is 0 in this case. All variations. +#COMMENT +#BEGIN_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + 0); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + 0); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + 0); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, find or create the hash table +#COMMENT entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, find or create the +#COMMENT hash table entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_COLUMN_COUNT_KEY + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, create the hash table +#COMMENT entry if necessary; ignore if it already present since the count is 0 in this case. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_KEY_COLUMN_COUNT +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + 0); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + 0); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + 0); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, create the hash table entry +#COMMENT if necessary; ignore if it already present since the count is 0 in this case. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_KEY_COLUMN_COUNT + if (currKeyIsNull) { + haveNullKey = true; + } else { +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + 0); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + 0); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + 0); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- it is either ALL NULL keys or ALL same non-NULL keys. + * + * First, we determine the number of non-NULL values in the non-key column. + * Then, whether ALL NULL keys or ALL same non-NULL keys, we create the key if necessary and + * include the new count. + * + * A NULL key is not in the slot table. It is separately represented by members haveNullKey + * and nullKeyCount. + * + */ + @Override + protected void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) + throws HiveException, IOException { + + /* + * First, determine the count of the non-key column for the whole batch which is covered by the + * repeating key. + */ + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + int nonKeyNonNullCount; + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: [REPEATING,] NO NULLS + nonKeyNonNullCount = inputLogicalSize; + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible + nonKeyNonNullCount = (nonKeyColVector.isNull[0] ? 0 : inputLogicalSize); + + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + nonKeyNonNullCount = 0; + if (batch.selectedInUse) { + + int[] selected = batch.selected; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + if (nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } else { + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + if (nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } + } + + /* + * Finally, use the non-key non-NULL count for our repeated non-NULL or NULL keys. + */ + if (keyColVector.noNulls || !keyColVector.isNull[0]) { + + // Non-NULL key. +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongZeroCountKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + nonKeyNonNullCount); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + nonKeyNonNullCount); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + nonKeyNonNullCount); +#ENDIF SERIALIZE_KEY + } else { + + // All NULL keys. Since we are counting a non-Key column, we must count it under the NULL + // pseudo-entry. + haveNullKey = true; + nullKeyCount += nonKeyNonNullCount; + + } + } + + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleLogicalNoNullsKey. + * + * Look for sequences of equal keys and determine their count. + */ + private void doLogicalNoNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY + } + + /* + * Do the non-key-column REPEATING NULLS case for handleLogicalNoNullsKey. + * + * Scan for sequences of equal keys. The column count is simply 0 because of all NULL values -- + * but we still must create an entry in the slot table. + */ + private void doLogicalNoNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + // No counting. + } else { + + // Current key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New current key. +#USE_LINES NEW_CURRENT_KEY + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_KEY_COLUMN_COUNT + } + + /* + * Do the NO REPEATING NULLS case for handleLogicalNoNullsKey. + * + * Look for sequence of equal keys -- look over at the non-key-column and count non-null rows. + * Even when the non-NULL row count is 0, we still must create an entry in the slot table. + */ + private void doLogicalNoNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * In general, loop over key column and process the keys. Look for sequences of equal keys. And, + * at the same time do any processing for the non-key-column counting. + * + * Here are the cases: + * + * 1) When non-key-column {REPEATING|NO REPEATING} NO NULLS, look for sequences of equal keys + * and determine their count. + * + * 2) When non-key-column REPEATING NULLS, scan for sequences of equal keys. The column count + * is simply 0 because of all NULL values -- but we still must create an entry in the + * slot table. + * + * 3) Otherwise, non-key-column NO REPEATING NULLS, as we are looking for sequence of + * equal keys -- look over at the non-key-column and count non-null rows. Even when the + * non-null row count is 0, we still must create an entry in the slot table. + * + */ + @Override + protected void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doLogicalNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doLogicalNoNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // REPEATING NO NULLS + doLogicalNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS. + + doLogicalNoNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } + + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleLogicalNullsKey. + * + * (For remaining comments see doLogicalNoNullsKeyNoNullsColumn). + */ + private void doLogicalNullsKeyNoNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY + } + + /* + * Do the non-key-column REPEATING NULLS case for handleLogicalNullsKey. + * + * (For remaining comments see doLogicalNoNullsKeyRepeatingNullColumn). + */ + private void doLogicalNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // No counting. + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New NULL key. + currKeyIsNull = true; + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + // No counting + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_KEY_COLUMN_COUNT + } + } + + /* + * Do the non-key-column NO REPEATING NULLS case for handleLogicalNullsKey. + * + * (For remaining comments see doLogicalNoNullsKeyNullsColumn). + */ + private void doLogicalNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES + + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COLUMN_COUNT_KEY + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * Both NULL and non-NULL keys will have counts for non-key-columns. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any processing for the non-key-column counting. + * + * (See the non-key column case comments for handleLogicalNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL entry processing. + * + */ + @Override + protected void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doLogicalNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doLogicalNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // Non-NULL repeating non-key column. + doLogicalNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doLogicalNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } + + //=============================================================================================== + //=============================================================================================== + + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handlePhysicalNoNullsKey. + * + * (For remaining comments see doLogicalNoNullsKeyNoNullsColumn). + */ + private void doPhysicalNoNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY + } + + /* + * Do the non-key-column REPEATING NULLS case for handleLogicalNoNullsKey. + * + * (For remaining comments see doLogicalNoNullsKeyRepeatingNullColumn). + */ + private void doPhysicalNoNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + // No counting. + } else { + + // Current key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New current key. +#USE_LINES NEW_CURRENT_KEY + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_KEY_COLUMN_COUNT + } + + /* + * Do the NO REPEATING NULLS case for handleLogicalNoNullsKey. + * + * (For remaining comments see doLogicalNoNullsKeyNullsColumn). + */ + private void doPhysicalNoNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + int count = (nonKeyIsNull[0] ? 0 : 1); + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NO NULLS key case. + * + * (For remaining comments see handleLogicalNoNullsKey). + */ + @Override + protected void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doPhysicalNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doPhysicalNoNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // REPEATING NO NULLS + doPhysicalNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS. + + doPhysicalNoNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } + + private void doPhysicalNullsKeyNoNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY + } + + private void doPhysicalNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // No counting. + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New NULL key. + currKeyIsNull = true; + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + // No counting + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_KEY_COLUMN_COUNT + } + } + + private void doPhysicalNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + + int count = (nonKeyIsNull[0] ? 0 : 1); + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COLUMN_COUNT_KEY + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NULLS key case. + * + * (For remaining comments for handleLogicalNullsKey). + */ + @Override + protected void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doPhysicalNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doPhysicalNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // Non-NULL repeating non-key column. + doPhysicalNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doPhysicalNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } + + @Override + protected void outputSingleKeyAndCountPairs( + ColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + +#IF LONG_KEY + outputLongZeroCountKeyAndCountPairs( + (LongColumnVector) keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + (BytesColumnVector) keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + doOutputSerializeKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SERIALIZE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountKeyOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountKeyOperator.txt new file mode 100644 index 0000000..6a6c893 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountKeyOperator.txt @@ -0,0 +1,476 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES SINGLE_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a COUNT(key-column) Native Vectorized GroupBy. That is, + * the grouping is being done on one long key and we are counting it. + * + * The NULL key is not represented in the hash table. We handle them as a special case. So, + * the find/create call for non-NULL keys looks like this: + + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); + + */ +public class + extends VectorGroupByHashKeySingleCountOperatorBase { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + +#USE_LINES SINGLE_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashSingleKeyCommonLines +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES CURRENT_COUNT_KEY_ENDED +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, find or create the hash table +#COMMENT entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_COUNT_KEY +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, find or create the +#COMMENT hash table entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_COUNT_KEY + if (!currKeyIsNull) { +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY + } +#END_LINES +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists but leave its count as 0. + */ + @Override + protected void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongNonZeroCountKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + inputLogicalSize); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + inputLogicalSize); +#ENDIF SERIALIZE_KEY + } else { + + // We note we encountered a repeating NULL key. But there will be no count for it -- + // just NULL. + haveNullKey = true; + } + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count count. + */ + @Override + protected void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COUNT_KEY + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists but leave its count as 0. + * + * Do find/create on each non-NULL key with count count. + */ + @Override + protected void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES + + int count; + if (currKeyIsNull) { + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + count = 1; + } + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED +2 + + // New NULL key. + currKeyIsNull = true; + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + +#USE_LINES GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. We don't count NULLs for NULL key. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED +2 + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COUNT_KEY + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NO NULLS key case. + * + * (For remaining comments for handleLogicalNoNullsKey). + */ + @Override + protected void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COUNT_KEY + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NULLS key case. + * + * (For remaining comments for handleLogicalNullsKey). + * + */ + @Override + protected void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + + int count; + if (currKeyIsNull) { + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + count = 1; + } + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED +2 + + // New NULL key. + currKeyIsNull = true; + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + +#USE_LINES GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED +2 + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COUNT_KEY + } + + @Override + protected void outputSingleKeyAndCountPairs( + ColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + +#IF LONG_KEY + outputLongNonZeroKeyAndCountPairs( + (LongColumnVector) keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + (BytesColumnVector) keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + doOutputSerializeKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SERIALIZE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountStarOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountStarOperator.txt new file mode 100644 index 0000000..c6daad8 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountStarOperator.txt @@ -0,0 +1,460 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES SINGLE_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a COUNT(*) Native Vectorized GroupBy that is lookup on a single long + * using a specialized hash map. + * + Count Star + + NULL key has separate counter. + + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); + + */ +public class + extends VectorGroupByHashKeySingleCountOperatorBase { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + +#USE_LINES SINGLE_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashSingleKeyCommonLines +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES CURRENT_COUNT_STAR_ENDED +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, find or create the hash table +#COMMENT entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_COUNT_STAR +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, find or create the +#COMMENT hash table entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_COUNT_STAR + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY + } +#END_LINES +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists AND count it. + */ + @Override + protected void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongNonZeroCountKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + inputLogicalSize); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + inputLogicalSize); +#ENDIF SERIALIZE_KEY + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + nullKeyCount += inputLogicalSize; + } + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count count. + */ + @Override + protected void handleLogicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } +#USE_LINES LAST_NO_NULLS_COUNT_STAR + } + + /* + * Logical batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys we note NULL key exists AND count it count. + * + * Do find/create on each non-NULL key with count count. + */ + @Override + protected void handleLogicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + int[] selected = batch.selected; + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED +2 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED +2 + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COUNT_STAR + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NO NULLS key case. + * + * (For remaining comments for handleLogicalNoNullsKey). + */ + @Override + protected void handlePhysicalNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COUNT_STAR + } + + /* + * Physical batch processing (i.e. selectedInUse is false since NO rows were filtered out) for + * NULLS key case. + * + * (For remaining comments for handleLogicalNullsKey). + * + */ + @Override + protected void handlePhysicalNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + +#USE_LINES KEY_VECTOR_VARIABLES + +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + + int count = 1; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED +2 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED +2 + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COUNT_STAR + } + + @Override + protected void outputSingleKeyAndCountPairs( + ColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + +#IF LONG_KEY + outputLongNonZeroKeyAndCountPairs( + (LongColumnVector) keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + (BytesColumnVector) keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + doOutputSerializeKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SERIALIZE_KEY + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java new file mode 100644 index 0000000..3c304d4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * This class is common operator class of Native Vectorized GroupBy that has common + * initialization logic. + */ +public abstract class VectorGroupByCommon + extends Operator + implements VectorizationContextRegion, VectorizationOperator { + + private static final long serialVersionUID = 1L; + + protected VectorGroupByDesc vectorDesc; + + protected VectorGroupByInfo vectorGroupByInfo; + + protected VectorizationContext vContext; + + // Create a new outgoing vectorization context because column name map will change. + protected VectorizationContext vOutContext; + + protected VectorExpression[] groupByKeyExpressions; + + protected VectorAggregationDesc[] vectorAggregationDescs; + + protected AggregationVariation aggregationVariation; + protected SingleCountAggregation singleCountAggregation; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + + // For debug tracing: the name of the map or reduce task. + protected transient String taskName; + + // Debug display. + protected transient long batchCounter; + + public VectorGroupByCommon() { + super(); + } + + public static int INT_PER_LONG_COUNT = Long.SIZE / Integer.SIZE; + + public VectorGroupByCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx); + + GroupByDesc desc = (GroupByDesc) conf; + this.conf = desc; + this.vectorDesc = (VectorGroupByDesc) vectorDesc; + vectorGroupByInfo = this.vectorDesc.getVectorGroupByInfo(); + + this.vContext = vContext; + + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); + + groupByKeyExpressions = this.vectorDesc.getKeyExpressions(); + + vectorAggregationDescs = this.vectorDesc.getVecAggrDescs(); + + aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + singleCountAggregation = vectorGroupByInfo.getSingleCountAggregation(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + // Determine the name of our map or reduce task for debug tracing. + BaseWork work = Utilities.getMapWork(hconf); + if (work == null) { + work = Utilities.getReduceWork(hconf); + } + taskName = work.getName(); + + batchCounter = 0; + } + + /** + * Implements the getName function for the Node Interface. + * + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + public static String getOperatorName() { + return "GBY"; + } + + @Override + public VectorizationContext getOutputVectorizationContext() { + return vOutContext; + } + + @Override + public VectorizationContext getInputVectorizationContext() { + return vContext; + } + + @Override + public VectorDesc getVectorDesc() { + return vectorDesc; + } + + @Override + public OperatorType getType() { + return OperatorType.GROUPBY; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java new file mode 100644 index 0000000..9af3975 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * This class is common operator class of Native Vectorized GroupBy for output generation. + * Taking the aggregations and filling up the output batch. + */ +public abstract class VectorGroupByCommonOutput + extends VectorGroupByCommon { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient VectorizedRowBatch outputBatch; + + private transient VectorizedRowBatchCtx vrbCtx; + + private transient TypeInfo[] outputTypes; + + private transient StandardStructObjectInspector standardOutputObjInspector; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByCommonOutput() { + super(); + } + + public VectorGroupByCommonOutput(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + List objectInspectors = new ArrayList(); + + List outputFieldNames = conf.getOutputColumnNames(); + + final int keyCount = (groupByKeyExpressions == null ? 0 : groupByKeyExpressions.length); + final int aggrCount = (vectorAggregationDescs == null ? 0 : vectorAggregationDescs.length); + outputTypes = new TypeInfo[keyCount + aggrCount]; + int outputTypesIndex = 0; + + for(int i = 0; i < keyCount; ++i) { + TypeInfo outputTypeInfo = groupByKeyExpressions[i].getOutputTypeInfo(); + outputTypes[outputTypesIndex++] = outputTypeInfo; + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + outputTypeInfo); + objectInspectors.add(objInsp); + } + + for(int i = 0; i < aggrCount; ++i) { + TypeInfo outputTypeInfo = vectorAggregationDescs[i].getOutputTypeInfo(); + outputTypes[outputTypesIndex++] = outputTypeInfo; + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo); + objectInspectors.add(objInsp); + } + + standardOutputObjInspector = + ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors); + outputObjInspector = standardOutputObjInspector; + + /** + * Setup the output batch and vectorization context for downstream operators. + */ + vrbCtx = new VectorizedRowBatchCtx(); + vrbCtx.init(standardOutputObjInspector, vOutContext.getScratchColumnTypeNames()); + outputBatch = vrbCtx.createVectorizedRowBatch(); + } + + public void forwardOutputBatch(VectorizedRowBatch outputBatch) throws HiveException { + + forward(outputBatch, null); + + outputBatch.reset(); + } + + /** + * Copy all of the keys and aggregations to the output batch. + */ + protected abstract void outputGroupBy() throws HiveException; + + protected void flushGroupBy() throws HiveException { + outputGroupBy(); + if (outputBatch.size > 0) { + forwardOutputBatch(outputBatch); + } + } + + /** + * On close, make sure a partially filled overflow batch gets forwarded. + */ + @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + if (!aborted) { + flushGroupBy(); + } + LOG.debug("VectorGroupByCommonOutputOperator closeOp " + batchCounter + " batches processed"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java new file mode 100644 index 0000000..d0dc5f5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.VectorGroupByCommonOutput; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is common hash operator class of Native Vectorized GroupBy for hash related + * initialization logic. + */ +public abstract class VectorGroupByHashCommon + extends VectorGroupByCommonOutput { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByHashCommon.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient long hashGroupByMemoryAvailableByteLength; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashCommon() { + super(); + } + + public VectorGroupByHashCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + final float memoryPercentage = conf.getGroupByMemoryUsage(); + final int testMaxMemoryAvailable = vectorGroupByInfo.getTestGroupByMaxMemoryAvailable(); + final long maxMemoryAvailable = + (testMaxMemoryAvailable == -1 ? + conf.getMaxMemoryAvailable() : testMaxMemoryAvailable); + hashGroupByMemoryAvailableByteLength = (long) (memoryPercentage * maxMemoryAvailable); + } + + /* + * Return the power of 2 that is equal to or next below a value. + * + * Example: + * 100000b = 2^5 = 32 + * where Long.numberOfLeadingZeros returns (64 - 6) = 58 + * and the result = 5. + * + * Replacing any set of lower 0's with 1's doesn't change the result. + * Or, numbers 32 to 63 return 5. + * + */ + public static int floorPowerOf2(long a) { + if (a == 0) { + return 0; + } + final int floorLeadingZerosCount = Long.numberOfLeadingZeros(a); + final int result = Long.SIZE - floorLeadingZerosCount - 1; + return result; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java new file mode 100644 index 0000000..1eaa453 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; + +/** + * This class is common hash operator class of Native Vectorized GroupBy with common operator + * logic for checking key limits and the common process method logic. + */ +public abstract class VectorGroupByHashOperatorBase + extends VectorGroupByHashTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashOperatorBase() { + super(); + } + + public VectorGroupByHashOperatorBase(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + determineInitialHashTableSize(); + + allocateHashTable(); + } + + protected void doBeforeMainLoopWork(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * If the hash table has less than the worst-case inputLogicalSize keys that + * could be added, then flush the current hash table entries and clear it. + */ + checkKeyLimitOncePerBatch(inputLogicalSize); + } + + protected abstract void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException; + + /* + * Common process method that does common work then drives the specialized Operator classes with + * the doBeforeMainLoopWork and doMainLoop overrides. + */ + @Override + public void process(Object row, int tag) throws HiveException { + + try { + VectorizedRowBatch batch = (VectorizedRowBatch) row; + + batchCounter++; + + final int inputLogicalSize = batch.size; + + if (inputLogicalSize == 0) { + return; + } + + /* + * Perform any key expressions. Results will go into scratch columns. + */ + if (groupByKeyExpressions != null) { + for (VectorExpression ve : groupByKeyExpressions) { + ve.evaluate(batch); + } + } + + doBeforeMainLoopWork(inputLogicalSize); + + doMainLoop(batch, inputLogicalSize); + + } catch (Exception e) { + throw new HiveException(e); + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java new file mode 100644 index 0000000..060d960 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java @@ -0,0 +1,350 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is common hash operator class of Native Vectorized GroupBy for the hash tables. + */ +public abstract class VectorGroupByHashTable + extends VectorGroupByHashCommon { + + private static final long serialVersionUID = 1L; + + private static final String CLASS_NAME = VectorGroupByHashTable.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + private boolean isBytesHashTable; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + // How many times we encountered a limit on the hash table and had to flush and recreate. + private long flushAndRecreateCount; + + // Memory available in bytes for the slot table, and when we have bytes keys, the memory available + // for the key store. + protected transient long hashTableMemoryAvailableByteLength; + protected transient long keyStoreMemoryAvailableByteLength; + + // The logical size and power of 2 mask of the hash table + protected transient int logicalHashBucketCount; + protected transient int logicalHashBucketMask; + + // The number of longs in the hash table slot array. It is the logical size * entries per slot. + protected int slotPhysicalArraySize; + + // The maximum number of keys we'll keep in the hash table before flushing. + protected transient int hashTableKeyCountLimit; + + // The slot table with 1, 2, 3, etc longs per entry. + protected transient long[] slotMultiples; + + // The key count and largest number of misses in our quadratic probing style hash table. + // Maintained by the hash table variations. + protected transient int keyCount; + protected transient int largestNumberOfSteps; + + // Byte length for WriteBuffers segments in the VectorKeyStore used for bytes keys + protected transient int keyStoreByteSize; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashTable() { + super(); + } + + public VectorGroupByHashTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + isBytesHashTable = + (this.vectorDesc.getVectorGroupByInfo().getHashTableKeyType() != HashTableKeyType.LONG); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + flushAndRecreateCount = 0; + + divvyUpHashGroupByMemory(); + } + + public long getFlushAndStartOverCount() { + return flushAndRecreateCount; + } + + public abstract int getHashTableMultiple(); + + /* + * Decide how to apportion memory for the slot table, and for the key store when we have bytes + * keys. (Single long keys are stored in the slot table). + */ + private void divvyUpHashGroupByMemory() { + + /* + * CONCERN: + * Do we really want a hash table to use the maximum supplied memory immediately? + * That could waste memory that other operators could use. And, cause Java GC + * issues because of how large the single slot table array is. Large hash tables + * with small keys sets could cause lots of unnecessary cold RAM hits. There is a tension + * here, of course. Too small a table and there will be more insert collisions. + * + * In contrast, the current VectorGroupByOperator and GroupByOperator classes use a + * Java HeapMap which automatically grows over time. + * + * The issues here are similar to MapJoin, except we have the possibility of using a smaller + * hash table and flushing everything to Reduce. Then, creating a larger slot table instead + * of zeroing the current one. MapJoin cannot flush -- it either needs to expand its + * hash tables to hold everything or spill some of the data to secondary storage (Hybrid Grace). + */ + + if (isBytesHashTable) { + + // UNDONE: Use key size estimates to make better decision than half... + final long half = hashGroupByMemoryAvailableByteLength / 2; + hashTableMemoryAvailableByteLength = half; + keyStoreMemoryAvailableByteLength = half; + } else { + hashTableMemoryAvailableByteLength = hashGroupByMemoryAvailableByteLength; + keyStoreMemoryAvailableByteLength = 0; + } + } + + //------------------------------------------------------------------------------------------------ + + private static final int LARGEST_NUMBER_OF_STEPS_THRESHOLD = 6; + + public boolean isAboveLargestNumberOfStepsThresold() { + return (largestNumberOfSteps > LARGEST_NUMBER_OF_STEPS_THRESHOLD); + } + + /* + * Do override this method in specialized hash tables that have more to initialize and/or create. + */ + public void allocateHashTable() throws HiveException { + allocateBucketArray(); + } + + /* + * Allocate the key store when we have bytes keys. + */ + public VectorKeyStore allocateVectorKeyStore(VectorKeyStore keyStore) { + if (keyStore == null) { + return new VectorKeyStore(keyStoreByteSize); + } else { + keyStore.clear(); + return keyStore; + } + } + + /* + * When flushing and recreating, release the memory when the slot table is changing size, etc. + */ + public void releaseHashTableMemory() throws HiveException { + if (slotMultiples.length == slotPhysicalArraySize) { + + // Keep it and clear it later. + return; + } + slotMultiples = null; + } + + // Since a maximum integer is 2^N - 2 it cannot be used we need one less than number of + // Integer bits. 2^30 = 1,073,741,824 + private static final int MAX_POWER_OF_2_FOR_INT_INDEXING = Integer.SIZE - 2; + + // An arbitrary factor to divide the slot table size by to get the key count limit. + // Hitting the key count limit will cause the hash table to be flushed to Reduce and cleared + // for refilling. + private static final int KEY_COUNT_FACTOR = 8; + + // Make sure we have comfortable room for at least one batch of new keys to support the + // VectorGroupByHashOperatorBase.checkKeyLimitOncePerBatch method. + private static final int MIN_HASH_TABLE_BYTE_LENGTH = + VectorizedRowBatch.DEFAULT_SIZE * KEY_COUNT_FACTOR * (Long.SIZE / Byte.SIZE); + private static final int MIN_POWER_OF_2 = floorPowerOf2(MIN_HASH_TABLE_BYTE_LENGTH); + + /* + * Determine the size for the slot table and, for bytes keys the key store. + */ + public void determineInitialHashTableSize() throws HiveException { + + /* + * Slot table size. + */ + + final int multiple = getHashTableMultiple(); + + // Take in account our multiple. + final int floorPowerOf2MaxHashTableMemoryByteLength = + floorPowerOf2(hashTableMemoryAvailableByteLength / multiple); + + // No matter how much memory they want to give us, our array is limited to int indexing. + int maxPowerOf2HashTableMemoryByteLength = + Math.min(floorPowerOf2MaxHashTableMemoryByteLength, MAX_POWER_OF_2_FOR_INT_INDEXING); + + // UNDONE: Artificially limit for now... 2^24 = 16,777,216 bytes. + maxPowerOf2HashTableMemoryByteLength = Math.min(maxPowerOf2HashTableMemoryByteLength, 24); + + final int powerOf2HashTableMemoryByteLength = + Math.max(maxPowerOf2HashTableMemoryByteLength, MIN_POWER_OF_2); + + final int hashTableByteSize = (1 << powerOf2HashTableMemoryByteLength); + final int hashTableLongSize = hashTableByteSize / (Long.SIZE / Byte.SIZE); + + logicalHashBucketCount = hashTableLongSize; + + slotPhysicalArraySize = logicalHashBucketCount * multiple; + + /* + * Key store size. + */ + + if (isBytesHashTable) { + final int floorPowerOf2MaxKeyStoreMemoryByteLength = + floorPowerOf2(keyStoreMemoryAvailableByteLength); + + // No matter how much memory they want to give us, our array is limited to int indexing. + int maxPowerOf2KeyStoreMemoryByteLength = + Math.min(floorPowerOf2MaxKeyStoreMemoryByteLength, MAX_POWER_OF_2_FOR_INT_INDEXING); + + keyStoreByteSize = (1 << maxPowerOf2KeyStoreMemoryByteLength); + + // CONSIDER: Better min/max limits. + keyStoreByteSize = Math.min(keyStoreByteSize, 1024 * 1024); + keyStoreByteSize = Math.max(keyStoreByteSize, 128 * 1024); + } + + if (!isBytesHashTable) { + LOG.info( + "Logical slot table size " + logicalHashBucketCount + + " multiple " + multiple); + } else { + LOG.info( + "Logical slot table size " + logicalHashBucketCount + + " multiple " + multiple + + " key store size " + keyStoreByteSize); + } + } + + /* + * When flushing and recreating, release the memory when the slot table is changing size, etc. + */ + public void determineNextHashTableSize() throws HiveException { + // CONSIDER: Growing the hash table size upon examining current hash table. + } + + /* + * For now, we are just allocating the slot table array. + * FUTURE: We'll need to revisit these calculations when we support STRING keys. + */ + protected void allocateBucketArray() { + if (slotMultiples != null) { + + // The releaseHashTableMemory method kept same size array, so just clear it. + Arrays.fill(slotMultiples, 0); + } else { + + logicalHashBucketMask = logicalHashBucketCount - 1; + + hashTableKeyCountLimit = logicalHashBucketCount / KEY_COUNT_FACTOR; + + slotMultiples = new long[slotPhysicalArraySize]; + } + + keyCount = 0; + largestNumberOfSteps = 0; + + if (flushAndRecreateCount != 0) { + LOG.info("FLush and recreate #" + flushAndRecreateCount); + } + } + + /* + * Check the worst case possibility -- adding a new key for each row in the batch -- and flush + * and recreate the hash table. + */ + protected void checkKeyLimitOncePerBatch(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * Check the hash table key limit for doing the worst case of adding all keys outside the + * inner loop for better performance. + */ + final boolean isReachedKeyLimit = + (keyCount + inputLogicalSize > hashTableKeyCountLimit); + if (isReachedKeyLimit || isAboveLargestNumberOfStepsThresold()) { + LOG.info( + "Reached key limit " + isReachedKeyLimit + + ", above largest number of steps thresold " + isAboveLargestNumberOfStepsThresold()); + + flushAndRecreateCount++; + flushAndRecreate(); + if (keyCount + inputLogicalSize > hashTableKeyCountLimit) { + + // Hash table is way too small. + raise2ndHitOutOfStorage(); + } + } + } + + protected void raise2ndHitOutOfStorage() throws HiveException { + throw new HiveException( + "After flushing hash table and clearing, there still isn't enough storage?"); + } + + protected void flushAndRecreate() throws HiveException, IOException { + + /* + * 1) Flush hash table. + * 2) Use current state to determine next sizes. + * 3) Release memory, if necessary. + * 4) Recreate/clear using next sizes. + */ + + flushGroupBy(); + + // Based on current hash table sizes and perhaps historical information, determine + // the size to use next during recreation. + determineNextHashTableSize(); + + releaseHashTableMemory(); + + allocateHashTable(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/duplicatereduction/VectorGroupByHashBytesKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/duplicatereduction/VectorGroupByHashBytesKeyDuplicateReductionTable.java new file mode 100644 index 0000000..9741acb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/duplicatereduction/VectorGroupByHashBytesKeyDuplicateReductionTable.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey.duplicatereduction.VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/* + * A single bytes key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashBytesKeyDuplicateReductionTable + extends VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorKeyStore keyStore; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeyDuplicateReductionTable() { + super(); + } + + public VectorGroupByHashBytesKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_DUPLICATE_REDUCTION_ENTRY_SIZE; + } + + protected static final int BYTES_DUPLICATE_REDUCTION_ENTRY_SIZE = 2; + + public void findOrCreateBytesDuplicateReductionKey(byte[] keyBytes, int keyStart, int keyLength, + long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int pairIndex; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex] == 0) { + break; + } + if (hashCode == slotMultiples[pairIndex + 1] && + keyStore.unsafeEqualKey(slotMultiples[pairIndex], keyBytes, keyStart, keyLength)) { + // Found it! A duplicate has now been eliminated. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + /* + if (isAboveLargestNumberOfStepsThresold()) { + System.out.println( + "*DEBUG* New largestNumberOfSteps " + largestNumberOfSteps + + " logicalHashBucketCount " + logicalHashBucketCount + + " keyCount " + keyCount + + " hashCode 0x" + Integer.toHexString(intHashCode)); + } + */ + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + + // First entry. + slotMultiples[pairIndex] = keyStore.add(keyBytes, keyStart, keyLength); + slotMultiples[pairIndex + 1] = hashCode; + + keyCount++; + + } + + private int countKeyPairIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + + protected int initBytesKeyIterator() { + countKeyPairIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long keyRef = slotMultiples[countKeyPairIndex]; + if (keyRef != 0) { + keyStore.getKey( + keyRef, + keyByteSegmentRef, + keyReadPos); + + countKeyPairIndex += 2; + return; + } + countKeyPairIndex += 2; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/singlecount/VectorGroupByHashBytesKeySingleCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/singlecount/VectorGroupByHashBytesKeySingleCountTable.java new file mode 100644 index 0000000..8b6bf87 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/singlecount/VectorGroupByHashBytesKeySingleCountTable.java @@ -0,0 +1,187 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey.singlecount.VectorGroupByHashSingleKeySingleCountOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/** + * A single bytes key hash table optimized for a single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashBytesKeySingleCountTable + extends VectorGroupByHashSingleKeySingleCountOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorKeyStore keyStore; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeySingleCountTable() { + super(); + } + + public VectorGroupByHashBytesKeySingleCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_ENTRY_SIZE; + } + + protected static final int BYTES_ENTRY_SIZE = 3; + + public void findOrCreateBytesKey(byte[] keyBytes, int keyStart, int keyLength, + long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int tripleIndex; + boolean isNewKey; + while (true) { + tripleIndex = 3 * slot; + if (slotMultiples[tripleIndex] == 0) { + isNewKey = true; + break; + } + if (hashCode == slotMultiples[tripleIndex + 1] && + keyStore.unsafeEqualKey(slotMultiples[tripleIndex], keyBytes, keyStart, keyLength)) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + /* + if (isAboveLargestNumberOfStepsThresold()) { + System.out.println( + "*DEBUG* New largestNumberOfSteps " + largestNumberOfSteps + + " logicalHashBucketCount " + logicalHashBucketCount + + " keyCount " + keyCount + + " hashCode 0x" + Integer.toHexString(intHashCode)); + } + */ + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + + // First entry. + slotMultiples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); + slotMultiples[tripleIndex + 1] = hashCode; + slotMultiples[tripleIndex + 2] = count; + + keyCount++; + + } else if (count > 0) { + + slotMultiples[tripleIndex + 2] += count; + } + } + + private int countKeyTripleIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + private long currentCountKeyCount; + + protected int initBytesKeyIterator() { + countKeyTripleIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + currentCountKeyCount = 0; + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long keyRef = slotMultiples[countKeyTripleIndex]; + if (keyRef != 0) { + keyStore.getKey( + keyRef, + keyByteSegmentRef, + keyReadPos); + currentCountKeyCount = slotMultiples[countKeyTripleIndex + 2]; + + countKeyTripleIndex += 3; + return; + } + countKeyTripleIndex += 3; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } + + public long getCount() { + return currentCountKeyCount; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/duplicatereduction/VectorGroupByHashLongKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/duplicatereduction/VectorGroupByHashLongKeyDuplicateReductionTable.java new file mode 100644 index 0000000..a0a856e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/duplicatereduction/VectorGroupByHashLongKeyDuplicateReductionTable.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey.duplicatereduction.VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * A single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashLongKeyDuplicateReductionTable + extends VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected boolean haveZeroKey; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveZeroKey = false; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_DUPLICATE_REDUCTION_ENTRY_SIZE; + } + + protected static int LONG_DUPLICATE_REDUCTION_ENTRY_SIZE = 1; + + public void findOrCreateLongDuplicateReductionKey(long key, long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + while (true) { + if (slotMultiples[slot] == 0) { + break; + } + if (key == slotMultiples[slot]) { + // Found it! A duplicate has now been eliminated. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + /* + if (isAboveLargestNumberOfStepsThresold()) { + System.out.println( + "*DEBUG* New largestNumberOfSteps " + largestNumberOfSteps + + " logicalHashBucketCount " + logicalHashBucketCount + + " keyCount " + keyCount + + " hashCode 0x" + Integer.toHexString(intHashCode)); + } + */ + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + // Create first-time key. + slotMultiples[slot] = key; + keyCount++; + } + + private int countKeyIndex; + + protected int initLongDuplicateReductionKeyIterator() { + countKeyIndex = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNext() { + while (true) { + long key = slotMultiples[countKeyIndex++]; + if (key != 0) { + return key; + } + } + } + + protected void doOutputLongKeys( + LongColumnVector keyColumnVector) throws HiveException { + + long[] keyVector = keyColumnVector.vector; + + if (haveZeroKey) { + + // Zero key to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + keyVector[outputBatch.size++] = 0; + } + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongDuplicateReductionKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNext(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/singlecount/VectorGroupByHashLongKeySingleCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/singlecount/VectorGroupByHashLongKeySingleCountTable.java new file mode 100644 index 0000000..accfa08 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/singlecount/VectorGroupByHashLongKeySingleCountTable.java @@ -0,0 +1,318 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey.singlecount.VectorGroupByHashSingleKeySingleCountOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Single long key hash table optimized for: + * 1) COUNT(*) Native Vectorized GroupBy. + * 2) COUNT(key-column) and COUNT(non-key-column) + * Native Vectorized GroupBy + */ +public abstract class VectorGroupByHashLongKeySingleCountTable + extends VectorGroupByHashSingleKeySingleCountOperatorBase { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeySingleCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeySingleCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_NON_ZERO_COUNT_ENTRY_SIZE; + } + + protected static final int LONG_NON_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongNonZeroCountKey(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + /* + if (isAboveLargestNumberOfStepsThresold()) { + System.out.println( + "*DEBUG* New largestNumberOfSteps " + largestNumberOfSteps + + " logicalHashBucketCount " + logicalHashBucketCount + + " keyCount " + keyCount + + " hashCode 0x" + Integer.toHexString(intHashCode)); + } + */ + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + + private int nonZeroCountPairIndex; + private long currentNonZeroCount; + + protected int initLongNonZeroCountKeyIterator() { + nonZeroCountPairIndex = 0; + currentNonZeroCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNextNonZeroCountKey() { + while (true) { + long count = slotMultiples[nonZeroCountPairIndex + 1]; + if (count > 0) { + currentNonZeroCount = count; + long key = slotMultiples[nonZeroCountPairIndex]; + nonZeroCountPairIndex += 2; + return key; + } + nonZeroCountPairIndex += 2; + } + } + + public long getLongNonZeroCount() { + return currentNonZeroCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key non-zero count hash table to the + * output. + */ + protected void outputLongNonZeroKeyAndCountPairs( + LongColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongNonZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNextNonZeroCountKey(); + countVector[i] = getLongNonZeroCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } + + //------------------------------------------------------------------------------------------------ + + private static long LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK = 1L << 63; + + protected static int LONG_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongZeroCountKey(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + if (isAboveLargestNumberOfStepsThresold()) { + System.out.println( + "*DEBUG* New largestNumberOfSteps " + largestNumberOfSteps + + " logicalHashBucketCount " + logicalHashBucketCount + + " keyCount " + keyCount + + " hashCode 0x" + Integer.toHexString(intHashCode)); + } + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + if (count == 0) { + slotMultiples[pairIndex + 1] = LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK; + } else { + slotMultiples[pairIndex + 1] = count; + } + } else if (count > 0) { + + // Only update count when we are leaving 0. + if (slotMultiples[pairIndex + 1] == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + } + + private int countKeyPairIndex; + private long currentCountKeyCount; + + protected int initLongZeroCountKeyIterator() { + countKeyPairIndex = 0; + currentCountKeyCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNextZeroCountKey() { + while (true) { + long count = slotMultiples[countKeyPairIndex + 1]; + if (count != 0) { + if (count == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + currentCountKeyCount = 0; + } else { + currentCountKeyCount = count; + } + long key = slotMultiples[countKeyPairIndex]; + countKeyPairIndex += 2; + return key; + } + countKeyPairIndex += 2; + } + } + + public long getCount() { + return currentCountKeyCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key zero count hash table to the + * output. + */ + protected void outputLongZeroCountKeyAndCountPairs( + LongColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + keyIsNull[batchIndex] = false; + keyVector[batchIndex] = getNextZeroCountKey(); + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/duplicatereduction/VectorGroupByHashSerializeKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/duplicatereduction/VectorGroupByHashSerializeKeyDuplicateReductionTable.java new file mode 100644 index 0000000..d6cc41d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/duplicatereduction/VectorGroupByHashSerializeKeyDuplicateReductionTable.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.serializekey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSerializeKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSerializeKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSerializeKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputSerializeKeys( + ColumnVector keyColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/singlecount/VectorGroupByHashSerializeKeySingleCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/singlecount/VectorGroupByHashSerializeKeySingleCountTable.java new file mode 100644 index 0000000..0d819c2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/singlecount/VectorGroupByHashSerializeKeySingleCountTable.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.serializekey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.singlecount.VectorGroupByHashBytesKeySingleCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single serialized key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSerializeKeySingleCountTable + extends VectorGroupByHashBytesKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSerializeKeySingleCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSerializeKeySingleCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputSerializeKeyAndCountPairs( + ColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/duplicatereduction/VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/duplicatereduction/VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase.java new file mode 100644 index 0000000..ee149ec --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/duplicatereduction/VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; + +/** + * An single key Operator base class optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase() { + super(); + } + + public VectorGrouoByHashSingleKeyDuplicateReductionOperatorBase(CompilationOpContext ctx, + OperatorDesc conf, VectorizationContext vContext, VectorDesc vectorDesc) + throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + ColumnVector keyColumnVector = outputBatch.cols[0]; + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + outputBatch.size++; + } + + outputSingleKeys(keyColumnVector); + } + + protected abstract void outputSingleKeys( + ColumnVector keyColumnVector) throws HiveException; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/singlecount/VectorGroupByHashSingleKeySingleCountOperatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/singlecount/VectorGroupByHashSingleKeySingleCountOperatorBase.java new file mode 100644 index 0000000..9de3720 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/singlecount/VectorGroupByHashSingleKeySingleCountOperatorBase.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; + +/** + * An single key Operator base class optimized for single count Native Vectorized GroupByy. + */ +public abstract class VectorGroupByHashSingleKeySingleCountOperatorBase + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSingleKeySingleCountOperatorBase() { + super(); + } + + public VectorGroupByHashSingleKeySingleCountOperatorBase(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + haveNullKey = false; + nullKeyCount = 0; + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + ColumnVector keyColumnVector = outputBatch.cols[0]; + + LongColumnVector countKeyColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + + countKeyColumnVector.isNull[nullBatchIndex] = false; + countKeyColumnVector.vector[nullBatchIndex] = nullKeyCount; + + outputBatch.size++; + } + + outputSingleKeyAndCountPairs(keyColumnVector, countKeyColumnVector); + } + + protected abstract void outputSingleKeyAndCountPairs( + ColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException; +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/duplicatereduction/VectorGroupByHashStringKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/duplicatereduction/VectorGroupByHashStringKeyDuplicateReductionTable.java new file mode 100644 index 0000000..c51f0db --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/duplicatereduction/VectorGroupByHashStringKeyDuplicateReductionTable.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/*U + * An single long string key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeys( + BytesColumnVector keyColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + } + outputBatch.size += count; + keyCount -= count; + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/singlecount/VectorGroupByHashStringKeySingleCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/singlecount/VectorGroupByHashStringKeySingleCountTable.java new file mode 100644 index 0000000..3c281b6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/singlecount/VectorGroupByHashStringKeySingleCountTable.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.singlecount.VectorGroupByHashBytesKeySingleCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * An single string key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeySingleCountTable + extends VectorGroupByHashBytesKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeySingleCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeySingleCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeyAndCountPairs( + BytesColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java similarity index 80% rename from ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java rename to ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java index b6684e0..9602118 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java @@ -16,18 +16,17 @@ * limitations under the License. */ -package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +package org.apache.hadoop.hive.ql.exec.vector.keystore; import org.apache.hadoop.hive.common.MemoryEstimate; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; -// Optimized for sequential key lookup. - -public class VectorMapJoinFastKeyStore implements MemoryEstimate { +/** + * Optimized for sequential key lookup. + */ - private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastKeyStore.class.getName()); +public class VectorKeyStore implements MemoryEstimate { private WriteBuffers writeBuffers; @@ -124,13 +123,11 @@ public boolean unsafeEqualKey(long keyRefWord, byte[] keyBytes, int keyStart, in public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyLength, WriteBuffers.Position readPos) { - int storedKeyLengthLength = + int storedKeyLength = (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); - boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn); - - // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord)); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); - if (isKeyLengthSmall && storedKeyLengthLength != keyLength) { + if (isKeyLengthSmall && storedKeyLength != keyLength) { return false; } long absoluteKeyOffset = @@ -139,9 +136,8 @@ public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyL writeBuffers.setReadPoint(absoluteKeyOffset, readPos); if (!isKeyLengthSmall) { // Read big value length we wrote with the value. - storedKeyLengthLength = writeBuffers.readVInt(readPos); - if (storedKeyLengthLength != keyLength) { - // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length"); + storedKeyLength = writeBuffers.readVInt(readPos); + if (storedKeyLength != keyLength) { return false; } } @@ -152,21 +148,25 @@ public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyL return false; } - // LOG.debug("VectorMapJoinFastKeyStore equalKey match on bytes"); return true; } - public VectorMapJoinFastKeyStore(int writeBuffersSize) { + public VectorKeyStore(int writeBuffersSize) { writeBuffers = new WriteBuffers(writeBuffersSize, AbsoluteKeyOffset.maxSize); unsafeReadPos = new WriteBuffers.Position(); } - public VectorMapJoinFastKeyStore(WriteBuffers writeBuffers) { + public VectorKeyStore(WriteBuffers writeBuffers) { // TODO: Check if maximum size compatible with AbsoluteKeyOffset.maxSize. this.writeBuffers = writeBuffers; unsafeReadPos = new WriteBuffers.Position(); } + public void clear() { + writeBuffers.clear(); + unsafeReadPos.clear(); + } + @Override public long getEstimatedMemorySize() { long size = 0; @@ -174,4 +174,23 @@ public long getEstimatedMemorySize() { size += unsafeReadPos == null ? 0 : unsafeReadPos.getEstimatedMemorySize(); return size; } + + public void getKey(long keyRefWord, ByteSegmentRef keyByteSegmentRef, + WriteBuffers.Position readPos) { + + int storedKeyLength = + (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); + + long absoluteKeyOffset = + (keyRefWord & AbsoluteKeyOffset.bitMask); + + writeBuffers.setReadPoint(absoluteKeyOffset, readPos); + if (!isKeyLengthSmall) { + // Read big value length we wrote with the value. + storedKeyLength = writeBuffers.readVInt(readPos); + } + writeBuffers.getByteSegmentRefToCurrent(keyByteSegmentRef, storedKeyLength, readPos); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 57db136..d251aa5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -23,6 +23,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -105,7 +106,7 @@ public VectorMapJoinFastBytesHashMap( valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); // Share the same write buffers with our value store. - keyStore = new VectorMapJoinFastKeyStore(valueStore.writeBuffers()); + keyStore = new VectorKeyStore(valueStore.writeBuffers()); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 726fd29..b284a83 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -23,6 +23,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -57,10 +58,8 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Count. - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2]++; } } @@ -95,7 +94,7 @@ public VectorMapJoinFastBytesHashMultiSet( int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize); + keyStore = new VectorKeyStore(writeBuffersSize); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 5d750a8..52801e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -21,6 +21,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; import org.apache.hadoop.io.BytesWritable; @@ -82,7 +83,7 @@ public VectorMapJoinFastBytesHashSet( int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize); + keyStore = new VectorKeyStore(writeBuffersSize); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index f2b794f..15dd125 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashTable; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.WriteBuffers; @@ -40,7 +41,7 @@ private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastBytesHashTable.class); - protected VectorMapJoinFastKeyStore keyStore; + protected VectorKeyStore keyStore; protected BytesWritable testKeyBytesWritable; @@ -68,15 +69,13 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr int i = 0; boolean isNewKey; while (true) { - int tripleIndex = 3 * slot; + final int tripleIndex = 3 * slot; if (slotTriples[tripleIndex] == 0) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty"); isNewKey = true;; break; } if (hashCode == slotTriples[tripleIndex + 1] && keyStore.unsafeEqualKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing"); isNewKey = false; break; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index d3fbf07..169087e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -93,6 +93,18 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeySingleCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSerializeKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSerializeKeySingleCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeySingleCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeySingleCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSerializeKeySingleCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeySingleCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeySingleCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSerializeKeySingleCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeySingleCountStarOperator; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -128,9 +140,13 @@ import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation.SingleCountAggregationKind; import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.VectorizationCondition; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; @@ -224,6 +240,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.mapred.InputFormat; @@ -303,6 +320,12 @@ private VectorizationEnabledOverride vectorizationEnabledOverride; boolean isTestForcedVectorizationEnable; + boolean isVectorizationGroupByNativeEnabled; + private VectorizationEnabledOverride vectorizationGroupByNativeEnabledOverride; + boolean isTestForcedVectorizationGroupByNativeEnable; + boolean weCanAttemptGroupByNativeVectorization; + int testGroupByMaxMemoryAvailable; + private boolean useVectorizedInputFileFormat; private boolean useVectorDeserialize; private boolean useRowDeserialize; @@ -2220,6 +2243,44 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE return physicalContext; } + // Native Vector GROUP BY. + isVectorizationGroupByNativeEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED); + + final String testVectorizationGroupByNativeOverrideString = + HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE); + vectorizationGroupByNativeEnabledOverride = + VectorizationEnabledOverride.nameMap.get(testVectorizationGroupByNativeOverrideString); + + isTestForcedVectorizationGroupByNativeEnable = false; + switch (vectorizationGroupByNativeEnabledOverride) { + case NONE: + weCanAttemptGroupByNativeVectorization = isVectorizationGroupByNativeEnabled; + break; + case DISABLE: + weCanAttemptGroupByNativeVectorization = false; + break; + case ENABLE: + weCanAttemptGroupByNativeVectorization = true; + isTestForcedVectorizationGroupByNativeEnable = !isVectorizationGroupByNativeEnabled; + + // Different parts of the code rely on this being set... + HiveConf.setBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED, true); + isVectorizationGroupByNativeEnabled = true; + break; + default: + throw new RuntimeException("Unexpected vectorization enabled override " + + vectorizationGroupByNativeEnabledOverride); + } + + testGroupByMaxMemoryAvailable = + HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE); + + // Input Format control. useVectorizedInputFileFormat = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT); @@ -3662,6 +3723,261 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi return result; } + private Operator specializeGroupByOperator( + Operator op, VectorizationContext vContext, + GroupByDesc desc, VectorGroupByDesc vectorDesc) + throws HiveException { + + VectorGroupByInfo vectorGroupByInfo = vectorDesc.getVectorGroupByInfo(); + + Operator vectorOp = null; + Class> opClass = null; + + VectorGroupByInfo.HashTableKeyType hashTableKeyType = + vectorGroupByInfo.getHashTableKeyType(); + + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + switch (aggregationVariation) { + case HASH_DUPLICATE_REDUCTION: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyDuplicateReductionOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyDuplicateReductionOperator.class; + break; + case SERIALIZE: + opClass = VectorGroupByHashSerializeKeyDuplicateReductionOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + + case HASH_SINGLE_COUNT: + { + SingleCountAggregationKind singleCountAggregationKind = + vectorGroupByInfo.getSingleCountAggregation().getSingleCountAggregationKind(); + + switch (singleCountAggregationKind) { + case COUNT_STAR: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeySingleCountStarOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeySingleCountStarOperator.class; + break; + case SERIALIZE: + opClass = VectorGroupByHashSerializeKeySingleCountStarOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + case COUNT_KEY: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeySingleCountKeyOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeySingleCountKeyOperator.class; + break; + case SERIALIZE: + opClass = VectorGroupByHashSerializeKeySingleCountKeyOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + case COUNT_COLUMN: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeySingleCountColumnOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeySingleCountColumnOperator.class; + break; + case SERIALIZE: + opClass = VectorGroupByHashSerializeKeySingleCountColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + default: + throw new RuntimeException( + "Unexpected single count aggregation kind " + singleCountAggregationKind); + } + } + break; + + default: + throw new RuntimeException("Unexpected aggregation variation " + aggregationVariation); + } + + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + vectorDesc.setIsNative(true); + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), desc, vContext, vectorDesc); + LOG.info("Vectorizer vectorizeOperator group by class " + vectorOp.getClass().getSimpleName()); + + return vectorOp; + } + + private boolean canSpecializeGroupBy(GroupByDesc desc, VectorGroupByDesc vectorDesc, + boolean isTezOrSpark, VectorizationContext vContext) throws HiveException { + + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + + VectorGroupByInfo vectorGroupByInfo = new VectorGroupByInfo(); + + List vectorizationIssueList = new ArrayList(); + + List keyDescs = desc.getKeys(); + final boolean isEmptyKey = keyDescs.isEmpty(); + final int outputKeyLength = keyDescs.size(); + + GroupByDesc.Mode groupByMode = desc.getMode(); + ProcessingMode processingMode = vectorDesc.getProcessingMode(); + + VectorExpression[] vecKeyExprs = vectorDesc.getKeyExpressions(); + final int vecKeyExprSize = vecKeyExprs.length; + + VectorAggregationDesc[] vecAggrDescs = vectorDesc.getVecAggrDescs(); + final int vecAggrDescSize = (vecAggrDescs == null ? 0 : vecAggrDescs.length); + + List aggrDescList = desc.getAggregators(); + + boolean isHash = (groupByMode == GroupByDesc.Mode.HASH); + final AggregationVariation aggregationVariation; + + SingleCountAggregation singleCountAggregation = null; + + if (isHash && vecAggrDescSize == 0) { + + // No aggregations just means the key is being grouped. We are getting rid of duplicate keys. + + aggregationVariation = AggregationVariation.HASH_DUPLICATE_REDUCTION; + singleCountAggregation = null; + + } else if (isHash && vecKeyExprSize == 1 && vecAggrDescSize == 1 && + aggrDescList.get(0).getGenericUDAFName().equalsIgnoreCase("count")) { + + // Single COUNT aggregation specialization. Store key and count in hash table without a + // hash element. + + AggregationDesc countAggrDesc = aggrDescList.get(0); + List countParamList = countAggrDesc.getParameters(); + final int countParamSize = countParamList.size(); + if (countParamSize == 0) { + + // COUNT(*) + + aggregationVariation = AggregationVariation.HASH_SINGLE_COUNT; + singleCountAggregation = + new SingleCountAggregation(SingleCountAggregationKind.COUNT_STAR); + + } else if (countParamSize == 1) { + + aggregationVariation = AggregationVariation.HASH_SINGLE_COUNT; + + VectorAggregationDesc countVecAggrDesc = vecAggrDescs[0]; + + final int inputColumnNum = countVecAggrDesc.getInputExpression().getOutputColumnNum(); + + boolean isKey = false; + for (VectorExpression vecKeyExpr : vecKeyExprs) { + if (vecKeyExpr.getOutputColumnNum() == inputColumnNum) { + isKey = true; + break; + } + } + if (isKey) { + singleCountAggregation = + new SingleCountAggregation(SingleCountAggregationKind.COUNT_KEY); + } else { + singleCountAggregation = + new SingleCountAggregation(SingleCountAggregationKind.COUNT_COLUMN, inputColumnNum); + } + } else { + + aggregationVariation = AggregationVariation.NONE; + + vectorizationIssueList.add( + "Cannot specialize aggregation function " + countAggrDesc.getGenericUDAFName() + + " that has more than 1 input parameter"); + } + + } else { + + // FUTURE: More aggregations. + aggregationVariation = AggregationVariation.NONE; + } + + // TEMPORARY: Restriction + boolean isSingleColumnKey = (vecKeyExprSize == 1); + + VectorGroupByInfo.HashTableKeyType hashTableKeyType = VectorGroupByInfo.HashTableKeyType.NONE; + if (isSingleColumnKey) { + ColumnVector.Type colVectorType = vecKeyExprs[0].getOutputColumnVectorType(); + switch (colVectorType) { + case LONG: + + // Integer family, date, interval year month. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.LONG; + break; + case BYTES: + + // String family. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.STRING; + break; + default: + + // All other data types get serialized. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.SERIALIZE; + break; + } + } + + vectorGroupByInfo.setIsVectorizationGroupByNativeEnabled( + weCanAttemptGroupByNativeVectorization); + vectorGroupByInfo.setEngine(engine); + + // Temporary restrictions... + vectorGroupByInfo.setIsSingleKeyColumn(isSingleColumnKey); + + vectorGroupByInfo.setVectorizationIssueList(vectorizationIssueList); + + vectorGroupByInfo.setAggregationVariation(aggregationVariation); + vectorGroupByInfo.setSingleCountAggregation(singleCountAggregation); + + vectorGroupByInfo.setHashTableKeyType(hashTableKeyType); + + vectorGroupByInfo.setTestGroupByMaxMemoryAvailable(testGroupByMaxMemoryAvailable); + + // So EXPLAIN VECTORIZATION can show native conditions, etc. + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + if (!weCanAttemptGroupByNativeVectorization || + !isTezOrSpark || + !isSingleColumnKey || + (aggregationVariation == AggregationVariation.NONE) || + groupByMode != GroupByDesc.Mode.HASH || + desc.isGroupingSetsPresent() || + vectorizationIssueList.size() > 0) { + return false; + } + + return true; + } + private Operator specializeReduceSinkOperator( Operator op, VectorizationContext vContext, ReduceSinkDesc desc, VectorReduceSinkDesc vectorDesc) throws HiveException { @@ -4232,16 +4548,30 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { - ImmutablePair,String> pair = + String issue = + doVectorizeGroupByOperatorPreparation( + groupByOp, vContext, vectorGroupByDesc); + Preconditions.checkState(issue == null); + return doVectorizeGroupByOperator( groupByOp, vContext, vectorGroupByDesc); - return pair.left; + } + + private static Operator doVectorizeGroupByOperator( + Operator groupByOp, VectorizationContext vContext, + VectorGroupByDesc vectorGroupByDesc) + throws HiveException { + Operator vectorOp = + OperatorFactory.getVectorOperator( + groupByOp.getCompilationOpContext(), (GroupByDesc) groupByOp.getConf(), + vContext, vectorGroupByDesc); + return vectorOp; } /* * NOTE: The VectorGroupByDesc has already been allocated and will be updated here. */ - private static ImmutablePair,String> doVectorizeGroupByOperator( + private static String doVectorizeGroupByOperatorPreparation( Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { @@ -4263,7 +4593,7 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { ImmutablePair pair = getVectorAggregationDesc(aggDesc, vContext); if (pair.left == null) { - return new ImmutablePair, String>(null, pair.right); + return pair.right; } vecAggrDescs[i] = pair.left; @@ -4274,14 +4604,9 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { vectorGroupByDesc.setKeyExpressions(vecKeyExpressions); vectorGroupByDesc.setVecAggrDescs(vecAggrDescs); vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns); - Operator vectorOp = - OperatorFactory.getVectorOperator( - groupByOp.getCompilationOpContext(), groupByDesc, - vContext, vectorGroupByDesc); - return new ImmutablePair, String>(vectorOp, null); - } - static int fake; + return null; // No issue. + } public static Operator vectorizeSelectOperator( Operator selectOp, VectorizationContext vContext, @@ -4830,23 +5155,40 @@ private static VectorPTFInfo createVectorPTFInfo(Operator,String> pair = - doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); - if (pair.left == null) { - setOperatorIssue(pair.right); + String issue = + doVectorizeGroupByOperatorPreparation(op, vContext, vectorGroupByDesc); + if (issue != null) { + setOperatorIssue(issue); throw new VectorizerCannotVectorizeException(); } - vectorOp = pair.left; - isNative = false; + + GroupByDesc groupByDesc = (GroupByDesc) op.getConf(); + boolean specialize = + canSpecializeGroupBy(groupByDesc, vectorGroupByDesc, isTezOrSpark, vContext); + + if (!specialize) { + + vectorOp = + doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); + isNative = false; + + } else { + + vectorOp = + specializeGroupByOperator(op, vContext, groupByDesc, vectorGroupByDesc); + isNative = true; + } if (vectorTaskColumnInfo != null) { VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions(); if (usesVectorUDFAdaptor(vecKeyExpressions)) { vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); - for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { - if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { - vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + if (vecAggrDescs != null) { + for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { + if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 31237c8..db31210 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -20,8 +20,10 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; @@ -31,7 +33,10 @@ import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; - +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation.SingleCountAggregationKind; /** * GroupByDesc. @@ -324,26 +329,38 @@ public Object clone() { this.groupingSetPosition, this.isDistinct); } + // Use LinkedHashSet to give predictable display order. + private static final Set vectorizableGroupByNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { private final GroupByDesc groupByDesc; private final VectorGroupByDesc vectorGroupByDesc; + private final VectorGroupByInfo vectorGroupByInfo; + + private VectorizationCondition[] nativeConditions; public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorGroupByDesc vectorGroupByDesc) { - // Native vectorization not supported. - super(vectorGroupByDesc, false); + super(vectorGroupByDesc, vectorGroupByDesc.isNative()); this.groupByDesc = groupByDesc; this.vectorGroupByDesc = vectorGroupByDesc; + vectorGroupByInfo = vectorGroupByDesc.getVectorGroupByInfo(); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getKeysExpression() { return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getAggregators() { + if (isNative) { + return null; + } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); List vecAggrList = new ArrayList(vecAggrDescs.length); for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { @@ -352,17 +369,20 @@ public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, return vecAggrList; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProcessingMode() { return vectorGroupByDesc.getProcessingMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getGroupByMode() { return groupByDesc.getMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getVectorOutputConditionsNotMet() { List results = new ArrayList(); @@ -379,13 +399,113 @@ public String getGroupByMode() { return results; } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProjectedOutputColumnNums() { return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns()); } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorGroupByInfo.getIsVectorizationGroupByNativeEnabled(); + + String engine = vectorGroupByInfo.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableGroupByNativeEngines; + boolean engineInSupported = vectorizableGroupByNativeEngines.contains(engine); + + final List vectorizationIssueList = vectorGroupByInfo.getVectorizationIssueList(); + + List conditionList = new ArrayList(); + conditionList.add( + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED.varname)); + conditionList.add( + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName)); + conditionList.add( + new VectorizationCondition( + vectorGroupByInfo.getIsSingleKeyColumn(), + "Single Key Column")); + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + conditionList.add( + new VectorizationCondition( + (aggregationVariation == AggregationVariation.HASH_SINGLE_COUNT || + aggregationVariation == AggregationVariation.HASH_DUPLICATE_REDUCTION), + "Single COUNT aggregation or Duplicate Reduction")); + conditionList.add( + new VectorizationCondition( + (vectorGroupByDesc.getProcessingMode() == ProcessingMode.HASH), + "Group By Mode HASH")); + conditionList.add( + new VectorizationCondition( + !groupByDesc.isGroupingSetsPresent(), + "No Grouping Sets")); + if (vectorizationIssueList.size() != 0) { + conditionList.add( + new VectorizationCondition( + true, + "Has issues \"" + + vectorizationIssueList.toString() + "\"")); + } + + VectorizationCondition[] conditions = + conditionList.toArray(new VectorizationCondition[0]); + + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "singleCountAggreation", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getSingleCountAggreation() { + if (!isNative) { + return null; + } + final SingleCountAggregationKind singleCountAggregationKind = + vectorGroupByInfo.getSingleCountAggregation().getSingleCountAggregationKind(); + if (singleCountAggregationKind == SingleCountAggregationKind.NONE) { + return null; + } + return singleCountAggregationKind.name(); + } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public GroupByOperatorExplainVectorization getGroupByVectorization() { VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) getVectorDesc(); if (vectorGroupByDesc == null) { @@ -404,11 +524,14 @@ public static String getComplexTypeEnabledCondition( public static String getComplexTypeWithGroupByEnabledCondition( boolean isVectorizationComplexTypesEnabled, boolean isVectorizationGroupByComplexTypesEnabled) { - final boolean enabled = (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); + final boolean enabled = + (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); return "(" + - HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationComplexTypesEnabled + " AND " + - HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationGroupByComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationGroupByComplexTypesEnabled + ") IS " + enabled; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index caf0c67..b7e60f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -67,8 +67,12 @@ private boolean isVectorizationComplexTypesEnabled; private boolean isVectorizationGroupByComplexTypesEnabled; + private boolean isNative; + private VectorGroupByInfo vectorGroupByInfo; + public VectorGroupByDesc() { - this.processingMode = ProcessingMode.NONE; + processingMode = ProcessingMode.NONE; + isNative = false; } public void setProcessingMode(ProcessingMode processingMode) { @@ -78,6 +82,14 @@ public ProcessingMode getProcessingMode() { return processingMode; } + public void setIsNative(boolean isNative) { + this.isNative = isNative; + } + + public boolean isNative() { + return isNative; + } + public void setKeyExpressions(VectorExpression[] keyExpressions) { this.keyExpressions = keyExpressions; } @@ -118,6 +130,14 @@ public boolean getIsVectorizationGroupByComplexTypesEnabled() { return isVectorizationGroupByComplexTypesEnabled; } + public void setVectorGroupByInfo(VectorGroupByInfo vectorGroupByInfo) { + this.vectorGroupByInfo = vectorGroupByInfo; + } + + public VectorGroupByInfo getVectorGroupByInfo() { + return vectorGroupByInfo; + } + /** * Which ProcessingMode for VectorGroupByOperator? * diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java new file mode 100644 index 0000000..994a916 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java @@ -0,0 +1,195 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +/** + * VectorGroupByInfo. + * + * A convenience data structure that has information needed to vectorize group by. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and agains by the VectorGroupByOperator's + * constructors and later during execution. + */ +public class VectorGroupByInfo { + + private static long serialVersionUID = 1L; + + public static enum HashTableKeyType { + NONE, + LONG, + STRING, + SERIALIZE + } + + //------------------------------------------------------------------------------------------------ + + public static enum AggregationVariation { + NONE, + HASH_SINGLE_COUNT, + HASH_DUPLICATE_REDUCTION + } + + public static class SingleCountAggregation { + + public enum SingleCountAggregationKind { + NONE, + COUNT_STAR, + COUNT_KEY, + COUNT_COLUMN + } + + private final SingleCountAggregationKind singleCountAggregationKind; + private final int countColumnNum; + + public SingleCountAggregation(SingleCountAggregationKind singleCountAggregationKind) { + this.singleCountAggregationKind = singleCountAggregationKind; + countColumnNum = -1; + } + + public SingleCountAggregation(SingleCountAggregationKind singleCountAggregationKind, + int countColumnNum) { + this.singleCountAggregationKind = singleCountAggregationKind; + this.countColumnNum = countColumnNum; + } + + public SingleCountAggregationKind getSingleCountAggregationKind() { + return singleCountAggregationKind; + } + + public int getCountColumnNum() { + return countColumnNum; + } + } + + //--------------------------------------------------------------------------- + + private boolean isVectorizationGroupByNativeEnabled; + private String engine; + + // Temporary restrictions... + private boolean isSingleColumnKey; + + private List vectorizationIssueList; + + private AggregationVariation aggregationVariation; + private SingleCountAggregation singleCountAggregation; + + private HashTableKeyType hashTableKeyType; + + private int testGroupByMaxMemoryAvailable; + + public VectorGroupByInfo() { + isVectorizationGroupByNativeEnabled = false; + + isSingleColumnKey = false; + + vectorizationIssueList = null; + + hashTableKeyType = HashTableKeyType.NONE; + + testGroupByMaxMemoryAvailable = -1; + } + + public boolean getIsVectorizationGroupByNativeEnabled() { + return isVectorizationGroupByNativeEnabled; + } + + public void setIsVectorizationGroupByNativeEnabled(boolean isVectorizationGroupByNativeEnabled) { + this.isVectorizationGroupByNativeEnabled = isVectorizationGroupByNativeEnabled; + } + + public String getEngine() { + return engine; + } + + public void setEngine(String engine) { + this.engine = engine; + } + + public boolean getIsSingleKeyColumn() { + return isSingleColumnKey; + } + + public void setIsSingleKeyColumn(boolean isSingleColumnKey) { + this.isSingleColumnKey = isSingleColumnKey; + } + + public List getVectorizationIssueList() { + return vectorizationIssueList; + } + + public void setVectorizationIssueList(List vectorizationIssueList) { + this.vectorizationIssueList = vectorizationIssueList; + } + + public void setAggregationVariation(AggregationVariation aggregationVariation) { + this.aggregationVariation = aggregationVariation; + } + + public AggregationVariation getAggregationVariation() { + return aggregationVariation; + } + + public void setSingleCountAggregation(SingleCountAggregation singleCountAggregation) { + this.singleCountAggregation = singleCountAggregation; + } + + public SingleCountAggregation getSingleCountAggregation() { + return singleCountAggregation; + } + + public HashTableKeyType getHashTableKeyType() { + return hashTableKeyType; + } + + public void setHashTableKeyType(HashTableKeyType hashTableKeyType) { + this.hashTableKeyType = hashTableKeyType; + } + + public int getTestGroupByMaxMemoryAvailable() { + return testGroupByMaxMemoryAvailable; + } + + public void setTestGroupByMaxMemoryAvailable(int testGroupByMaxMemoryAvailable) { + this.testGroupByMaxMemoryAvailable = testGroupByMaxMemoryAvailable; + } +} diff --git ql/src/test/queries/clientpositive/vector_count_simple.q ql/src/test/queries/clientpositive/vector_count_simple.q new file mode 100644 index 0000000..bdcf023 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_count_simple.q @@ -0,0 +1,485 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.groupby.native.enabled=true; +-- We want to create selectedInUse batches with WHERE expressions. +SET hive.optimize.ppd=false; + +-- SORT_QUERY_RESULTS + + + +CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt; +CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_long_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_long_1a values (-5206670856103795573); +insert into groupby_long_1a values (800); + +CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt; +CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt; + +insert into groupby_long_1a_nonull values (-6187919478609154811); +insert into groupby_long_1a_nonull values (1000); + + + +CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt; +CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt; + +insert into groupby_long_1b values (NULL); + +insert into groupby_long_1b values (32030); +insert into groupby_long_1b values (800); + +CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt; +CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt; + +insert into groupby_long_1b_nonull values (31713); +insert into groupby_long_1b_nonull values (34); + + + +CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt; +CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt; + +insert into groupby_long_1c values (NULL, NULL); +insert into groupby_long_1c values (NULL, 'TKTKGVGFW'); +insert into groupby_long_1c values (NULL, 'NEW'); + +CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt; +CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt; + +insert into groupby_long_1c values (1928928239, NULL); +insert into groupby_long_1c values (9999, 'NEW'); + + + +-- *_long_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1a group by key; +select key, count(key) from groupby_long_1a group by key; +select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1a group by key; +select key, count(*) from groupby_long_1a group by key; +select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key; + +-- *_long_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1a_nonull group by key; +select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1a_nonull group by key; +select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key; + +-- *_long_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1b group by key; +select key, count(key) from groupby_long_1b group by key; +select key, count(key) from groupby_long_1b where key != 32030 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1b group by key; +select key, count(*) from groupby_long_1b group by key; +select key, count(*) from groupby_long_1b where key != 32030 group by key; + +-- *_long_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1b_nonull group by key; +select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1b_nonull group by key; +select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key; + +-- *_long_1c + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1c group by key; +select key, count(key) from groupby_long_1c group by key; +select key, count(key) from groupby_long_1c where key != -1437463633 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1c group by key; +select key, count(*) from groupby_long_1c group by key; +select key, count(*) from groupby_long_1c where key != -1437463633 group by key; + +-- COUNT_COLUMN +explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key; +select key, count(b_string) from groupby_long_1c group by key; +select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key; + +-- *_long_1c_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1c_nonull group by key; +select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1c_nonull group by key; +select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- COUNT_COLUMN +select key, count(b_string) from groupby_long_1c_nonull group by key; +select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key; + + + +CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt; +CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_string_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_string_1a values ('QNCYBDW'); +insert into groupby_string_1a values ('NOT'); + +CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt; +CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt; + +insert into groupby_string_1a_nonull values ('PXLD'); +insert into groupby_string_1a_nonull values ('AA'); + +-- Use same data as 1a. +CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt; +CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt; + +insert into groupby_string_1a values (NULL); + +insert into groupby_string_1a values ('QNCYBDW'); +insert into groupby_string_1a values ('NOT'); + +CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt; +CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt; + +insert into groupby_string_1b_nonull values ('PXLD'); +insert into groupby_string_1b_nonull values ('AA'); + +CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt; +CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt; + +insert into groupby_string_1c values (NULL, NULL, NULL); +insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values (NULL, '2018-04-11', NULL); + +insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL); +insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL); + +insert into groupby_string_1c values ('BB', NULL, NULL); +insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values ('DD', '2018-04-14', NULL); + +CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt; +CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt; + +insert into groupby_string_1c_nonull values ('SDA', NULL, NULL); +insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL); + +insert into groupby_string_1c_nonull values ('EEE', NULL, NULL); +insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888'); +insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL); + +-- *_string_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1a group by key; +select key, count(key) from groupby_string_1a group by key; +select key, count(key) from groupby_string_1a where key != 'PXLD' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1a group by key; +select key, count(*) from groupby_string_1a group by key; +select key, count(*) from groupby_string_1a where key != 'PXLD' group by key; + +-- *_string_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1a_nonull group by key; +select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1a_nonull group by key; +select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key; + +-- *_string_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1b group by key; +select key, count(key) from groupby_string_1b group by key; +select key, count(key) from groupby_string_1b where key != 'MXGD' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1b group by key; +select key, count(*) from groupby_string_1b group by key; +select key, count(*) from groupby_string_1b where key != 'MXGD' group by key; + +-- *_string_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1b_nonull group by key; +select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1b_nonull group by key; +select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key; + +-- *_string_1c + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1c group by key; +select key, count(key) from groupby_string_1c group by key; +select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1c group by key; +select key, count(*) from groupby_string_1c group by key; +select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_date +explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key; +select key, count(s_date) from groupby_string_1c group by key; +select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_timestamp +explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key; +select key, count(s_timestamp) from groupby_string_1c group by key; +select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- *_string_1c_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1c_nonull group by key; +select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1c_nonull group by key; +select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_date +select key, count(s_date) from groupby_string_1c_nonull group by key; +select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_timestamp +select key, count(s_timestamp) from groupby_string_1c_nonull group by key; +select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + + + + +CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt; +CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt; + +CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt; +CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt; + + +CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt; +CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt; + +CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt; +CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt; + + +-- *_serialize_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key; +select key, count(key) from groupby_serialize_1a group by key; +select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key; +select key, count(*) from groupby_serialize_1a group by key; +select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key; + +-- *_serialize_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_serialize_1a_nonull group by key; +select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_serialize_1a_nonull group by key; +select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key; + + +-- *_serialize_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key; +select key, count(key) from groupby_serialize_1b group by key; +select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key; +select key, count(*) from groupby_serialize_1b group by key; +select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_smallint +explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key; +select key, count(c_smallint) from groupby_serialize_1b group by key; +select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_string +explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key; +select key, count(c_string) from groupby_serialize_1b group by key; +select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- *_serialize_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_serialize_1b_nonull group by key; +select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_serialize_1b_nonull group by key; +select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_smallint +select key, count(c_smallint) from groupby_serialize_1b_nonull group by key; +select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_string +select key, count(c_string) from groupby_serialize_1b_nonull group by key; +select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key; + +------------------------------------------------------------------------------------------ + +CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k; + +-- STRING +explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10; +select s, count(s) from over10k group by s order by s limit 10; + +explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10; +select s, count(ts) from over10k group by s order by s limit 10; + +explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10; +select s, count(*) from over10k group by s order by s limit 10; + +-- SERIALIZE TIMESTAMP +explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10; +select ts, count(ts) from over10k group by ts order by ts limit 10; + +explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10; +select ts, count(d) from over10k group by ts order by ts limit 10; + +explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10; +select ts, count(*) from over10k group by ts order by ts limit 10; + +-- SERIALIZE DECIMAL +explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10; + +explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10; + +explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10; + + +set hive.test.vectorized.groupby.native.max.memory.available=1024; + +explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10; +select i, count(i) from over10k group by i order by i limit 10; + +explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10; +select i, count(b) from over10k group by i order by i limit 10; + +explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10; +select i, count(*) from over10k group by i order by i limit 10; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 89b7169..e6e2177 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1497,6 +1497,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -1594,6 +1596,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -1608,6 +1612,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2284,6 +2290,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -2381,6 +2389,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2395,6 +2405,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: One Long Key IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_partitioned.q.out ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index 799062e..cfbed1b 100644 --- ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1721,10 +1721,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 10:tinyint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: tinyint) @@ -1748,7 +1749,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -2106,6 +2107,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/llap_smb.q.out ql/src/test/results/clientpositive/llap/llap_smb.q.out index 4c0f6a0..44d78f5 100644 --- ql/src/test/results/clientpositive/llap/llap_smb.q.out +++ ql/src/test/results/clientpositive/llap/llap_smb.q.out @@ -146,7 +146,7 @@ PREHOOK: Input: default@orc_a@y=2001/q=7 PREHOOK: Input: default@orc_a@y=2001/q=8 PREHOOK: Input: default@orc_a@y=2001/q=9 PREHOOK: Input: default@orc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_a @@ -171,7 +171,7 @@ POSTHOOK: Input: default@orc_a@y=2001/q=7 POSTHOOK: Input: default@orc_a@y=2001/q=8 POSTHOOK: Input: default@orc_a@y=2001/q=9 POSTHOOK: Input: default@orc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2000 2 6578 2001 8 9438 2000 3 6149 @@ -234,7 +234,7 @@ STAGE PLANS: 0 id (type: bigint) 1 id (type: bigint) outputColumnNames: _col2, _col3 - Statistics: Num rows: 987 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 988 Data size: 7904 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col2 (type: int), _col3 (type: smallint) @@ -295,7 +295,7 @@ PREHOOK: Input: default@orc_a@y=2001/q=7 PREHOOK: Input: default@orc_a@y=2001/q=8 PREHOOK: Input: default@orc_a@y=2001/q=9 PREHOOK: Input: default@orc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_a @@ -320,7 +320,7 @@ POSTHOOK: Input: default@orc_a@y=2001/q=7 POSTHOOK: Input: default@orc_a@y=2001/q=8 POSTHOOK: Input: default@orc_a@y=2001/q=9 POSTHOOK: Input: default@orc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2000 2 6578 2001 8 9438 2000 3 6149 diff --git ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index 2c13d5d..b7fdb56 100644 --- ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -73,6 +73,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -252,6 +254,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out index 4cd56f8..56151b1 100644 --- ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out @@ -66,11 +66,11 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY @@ -142,11 +142,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -160,11 +160,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -178,11 +178,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -245,6 +245,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -330,11 +332,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -361,20 +363,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -388,11 +390,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -406,11 +408,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -424,11 +426,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -455,20 +457,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -482,11 +484,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -500,11 +502,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -518,11 +520,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out index f4d6218..f902420 100644 --- ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out @@ -78,11 +78,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -154,11 +154,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -221,6 +221,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -333,11 +335,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2144 2145 2142 2143 2140 2141 @@ -366,11 +368,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -442,11 +444,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -509,6 +511,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -621,11 +625,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2146 2147 2144 2145 2142 2143 @@ -654,11 +658,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -730,11 +734,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -797,6 +801,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -909,11 +915,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2148 2149 2146 2147 2144 2145 diff --git ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index a2bb0f3..853f9b6 100644 --- ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -88,11 +88,11 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 @@ -167,12 +167,12 @@ PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456 stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -237,6 +237,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: string) @@ -350,12 +352,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -386,22 +388,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -416,12 +418,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -452,22 +454,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -482,12 +484,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 diff --git ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out index 7249363..ca265ff 100644 --- ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out @@ -66,11 +66,11 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY @@ -142,11 +142,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -160,11 +160,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -178,11 +178,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -245,6 +245,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -330,11 +332,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -361,20 +363,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -388,11 +390,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -406,11 +408,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -424,11 +426,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -455,20 +457,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -482,11 +484,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -500,11 +502,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -518,11 +520,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index 1e090f0..29f3282 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -286,6 +288,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -426,6 +430,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index c99ac8d..5fcc875 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 54216fa..51481e9 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -270,6 +270,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 6093beb..8f88e8c 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -166,6 +166,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -370,6 +372,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -764,6 +768,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1111,11 +1117,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1140,7 +1146,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1249,11 +1255,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1278,7 +1284,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1387,11 +1393,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1416,7 +1422,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -1525,11 +1531,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1554,7 +1560,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index aabfc73..0aac152 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -175,6 +175,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -365,11 +367,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 10:binary - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bin (type: binary) @@ -394,7 +396,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 861ae9a..3764516 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -149,6 +149,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index 9a43659..0ded352 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -110,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) @@ -306,6 +308,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index e8bb722..30547d3 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -78,6 +78,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -303,6 +305,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index f2277c1..988db52 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -940,6 +940,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1157,13 +1159,14 @@ STAGE PLANS: Group By Operator aggregations: count(val) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN keys: str (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1188,7 +1191,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1300,13 +1303,14 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 6:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1331,7 +1335,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index 90086ea..eec008a 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1265,10 +1265,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 16:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ws_order_number (type: int) @@ -1292,7 +1293,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1323,6 +1324,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_count_simple.q.out ql/src/test/results/clientpositive/llap/vector_count_simple.q.out new file mode 100644 index 0000000..bf9fa8d --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_count_simple.q.out @@ -0,0 +1,7011 @@ +PREHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SIMPLE [(groupby_long_1a_txt)groupby_long_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SIMPLE [(groupby_long_1a_nonull_txt)groupby_long_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a_nonull values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SIMPLE [(groupby_long_1b_txt)groupby_long_1b_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1b values (32030) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (32030) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SIMPLE [(groupby_long_1b_nonull_txt)groupby_long_1b_nonull_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b_nonull values (31713) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (31713) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b_nonull values (34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c.key SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: Lineage: groupby_long_1c_nonull.b_string SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c_nonull.key SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: key, b_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b_string) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +9999 1 +NULL 3 +PREHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +9999 1 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +PREHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SIMPLE [(groupby_string_1a_txt)groupby_string_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SIMPLE [(groupby_string_1a_nonull_txt)groupby_string_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b +POSTHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b +POSTHOOK: Lineage: groupby_string_1b.key SIMPLE [(groupby_string_1b_txt)groupby_string_1b_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SIMPLE [(groupby_string_1b_nonull_txt)groupby_string_1b_nonull_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1b_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_date SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 0 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 3 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 0 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 0 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 6 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date) + outputColumnNames: key, s_date + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_date) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 5 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_timestamp (type: timestamp) + outputColumnNames: key, s_timestamp + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_timestamp) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 4 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a +POSTHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a +POSTHOOK: Lineage: groupby_serialize_1a.key SIMPLE [(groupby_serialize_1a_txt)groupby_serialize_1a_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: Lineage: groupby_serialize_1a_nonull.key SIMPLE [(groupby_serialize_1a_nonull_txt)groupby_serialize_1a_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b +POSTHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b +POSTHOOK: Lineage: groupby_serialize_1b.c_double SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_smallint SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_string SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.key SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_double SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_smallint SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_string SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.key SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_smallint (type: smallint) + outputColumnNames: key, c_smallint + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_smallint) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_string (type: string) + outputColumnNames: key, c_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_string) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string), ts (type: timestamp) + outputColumnNames: s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: d (type: double), ts (type: timestamp) + outputColumnNames: d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(dec) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(bin) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(i) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index 8dd959e..79f937b 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -381,6 +381,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 902d137..0df1dc6 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -88,6 +88,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -268,6 +270,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -482,6 +486,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -682,6 +688,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 50e4305..6cc6b2e 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -592,6 +592,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -1211,6 +1213,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index c6867f8..ad8020e 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -2304,6 +2304,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -3245,6 +3247,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3409,6 +3413,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3655,6 +3661,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3786,6 +3794,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3917,6 +3927,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -6300,6 +6312,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -7247,6 +7261,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7412,6 +7428,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7659,6 +7677,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7791,6 +7811,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7923,6 +7945,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out index 73d04a9..562d3e9 100644 --- ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out @@ -144,6 +144,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out index 127d8ad..7b775eb 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out @@ -146,6 +146,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 3bfbda0..4ee1521 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -60,6 +60,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -198,6 +200,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -362,6 +366,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -623,6 +629,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -978,6 +986,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -1013,6 +1023,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index 9a2f5d8..17d61cb 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -231,6 +233,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -390,6 +394,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -543,6 +549,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -696,6 +704,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -856,6 +866,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out index 6005fb2..7659a54 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -75,6 +75,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -277,6 +279,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -489,6 +493,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -603,13 +609,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 @@ -796,6 +803,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -910,13 +919,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 @@ -1099,6 +1109,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1427,6 +1439,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1748,6 +1762,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1914,6 +1930,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1987,13 +2005,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 @@ -2132,6 +2151,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index d8e6b3f..30d80e9 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -82,6 +82,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -258,6 +260,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out index 3586eae..fe38089 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -90,6 +90,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -252,6 +254,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -414,6 +418,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -576,6 +582,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -732,6 +740,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -884,10 +894,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -913,7 +924,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1024,13 +1035,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -1055,7 +1067,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index b072ffc..160598b 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string) @@ -257,6 +259,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string) @@ -661,6 +665,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out index 74caa3f..0a169b1 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -83,6 +83,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -222,6 +224,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -387,6 +391,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out index b896193..bcaa3e1 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -84,6 +84,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -328,6 +330,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -603,6 +607,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index 8da5735..e530987 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -146,6 +148,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -268,6 +272,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -338,6 +344,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -487,6 +495,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -557,6 +567,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out index 6c4ae65..c8a764b 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -219,6 +221,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index 80e073b..7f02670 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -237,6 +239,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -405,6 +409,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -574,6 +580,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -780,6 +788,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -941,6 +951,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1109,6 +1121,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1271,6 +1285,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1478,6 +1494,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1634,6 +1652,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1792,6 +1812,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1941,6 +1963,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2107,6 +2131,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2273,6 +2299,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2434,6 +2462,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index e67bca7..f6909f8 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -277,6 +279,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -478,6 +482,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -677,6 +683,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -868,10 +876,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -898,7 +907,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1051,13 +1060,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -1083,7 +1093,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index dc3363d..c1100ba 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -74,6 +74,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: category (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index 98e6e54..b823929 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -89,6 +89,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -104,10 +106,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index d90ebf0..4417056 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -268,10 +268,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -296,7 +297,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -467,10 +468,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -494,7 +496,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -763,6 +765,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_item_sk (type: int) @@ -829,6 +833,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: 1 (type: int) @@ -987,6 +993,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index ef49d90..8f18045 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -327,6 +329,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -673,6 +677,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -708,6 +714,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out index 79ca6d9..54356b5 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out @@ -77,6 +77,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -494,10 +496,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -524,7 +527,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -569,6 +572,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -679,10 +684,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: 1 (type: int) @@ -709,7 +715,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -760,6 +766,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out index 6c6986e..a83eb4e 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out @@ -55,7 +55,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct] @@ -66,7 +66,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -78,7 +78,7 @@ STAGE PLANS: keys: key (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: @@ -86,11 +86,13 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -99,7 +101,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs @@ -147,13 +149,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index ec3e2b8..f6262c9 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -170,6 +170,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: s_store_id (type: string), 0L (type: bigint) @@ -301,6 +303,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index bb555df..2f990b7 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -313,10 +313,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -342,7 +343,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 5fb8258..ccc7132 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -128,6 +128,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -278,6 +280,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -499,6 +503,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -710,6 +716,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 17704e5..4f8f1e0 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -3389,9 +3389,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3405,7 +3406,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3502,9 +3503,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3518,7 +3520,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3617,9 +3619,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3633,7 +3636,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3730,6 +3733,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3848,6 +3853,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3928,9 +3935,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3944,7 +3952,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -4044,6 +4052,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4151,9 +4161,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4167,7 +4178,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -4290,9 +4301,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4306,7 +4318,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4416,9 +4428,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4432,7 +4445,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4552,9 +4565,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4568,7 +4582,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4678,6 +4692,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator @@ -4796,9 +4812,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4812,7 +4829,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -4826,9 +4843,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4842,7 +4860,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4962,9 +4980,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4978,7 +4997,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5105,9 +5124,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5121,7 +5141,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5233,9 +5253,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5249,7 +5270,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -5380,9 +5401,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5396,7 +5418,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -5529,9 +5551,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5545,7 +5568,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -5705,9 +5728,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5721,7 +5745,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -5869,9 +5893,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkStringOperator @@ -5885,7 +5910,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -6009,10 +6034,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6038,7 +6064,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6211,10 +6237,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6240,7 +6267,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6415,10 +6442,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6444,7 +6472,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6623,6 +6651,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 0:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) @@ -6830,6 +6860,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -6966,10 +6998,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6995,7 +7028,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7177,6 +7210,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -7377,10 +7412,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -7406,7 +7442,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7637,10 +7673,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -7666,7 +7703,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7852,10 +7889,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -7882,7 +7920,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8109,10 +8147,11 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8138,7 +8177,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8325,6 +8364,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -8534,10 +8575,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8563,7 +8605,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8597,10 +8639,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8626,7 +8669,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8834,10 +8877,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8863,7 +8907,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9064,10 +9108,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9093,7 +9138,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9278,10 +9323,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9307,7 +9353,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9528,10 +9574,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9557,7 +9604,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9780,10 +9827,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9809,7 +9857,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10090,10 +10138,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10119,7 +10168,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10366,10 +10415,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -10395,7 +10445,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10529,10 +10579,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10558,7 +10609,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10732,10 +10783,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10761,7 +10813,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10937,10 +10989,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10966,7 +11019,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11146,6 +11199,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 0:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) @@ -11354,6 +11409,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -11490,10 +11547,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -11519,7 +11577,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11702,6 +11760,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -11903,10 +11963,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -11932,7 +11993,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12165,10 +12226,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -12194,7 +12256,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12381,10 +12443,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -12411,7 +12474,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12638,10 +12701,11 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -12667,7 +12731,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12855,6 +12919,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -13064,10 +13130,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13093,7 +13160,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13127,10 +13194,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13156,7 +13224,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13364,10 +13432,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13393,7 +13462,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13594,10 +13663,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13623,7 +13693,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13808,10 +13878,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13837,7 +13908,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14058,10 +14129,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -14087,7 +14159,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14310,10 +14382,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -14339,7 +14412,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14622,10 +14695,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -14651,7 +14725,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14899,10 +14973,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -14928,7 +15003,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15062,10 +15137,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -15091,7 +15167,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15265,10 +15341,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -15294,7 +15371,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15470,10 +15547,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -15499,7 +15577,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15679,6 +15757,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 0:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) @@ -15887,6 +15967,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -16023,10 +16105,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16052,7 +16135,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16235,6 +16318,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -16436,10 +16521,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16465,7 +16551,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16698,10 +16784,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16727,7 +16814,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16914,10 +17001,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16944,7 +17032,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17171,10 +17259,11 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17200,7 +17289,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17388,6 +17477,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -17597,10 +17688,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17626,7 +17718,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17660,10 +17752,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17689,7 +17782,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17897,10 +17990,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17926,7 +18020,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18127,10 +18221,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18156,7 +18251,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18341,10 +18436,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18370,7 +18466,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18591,10 +18687,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18620,7 +18717,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18843,10 +18940,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18872,7 +18970,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -19155,10 +19253,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -19184,7 +19283,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -19432,10 +19531,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -19461,7 +19561,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 37821fb..77ffad6 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -42,10 +42,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -70,7 +71,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -141,10 +142,11 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -169,7 +171,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -314,10 +316,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -342,7 +345,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -419,6 +422,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 17:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index 7a2cd54..a724aab 100644 --- ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -272,6 +274,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out index 0b645ab..eec6639 100644 --- ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out @@ -1064,11 +1064,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 9:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: double) @@ -1093,7 +1093,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1754,6 +1754,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 12:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: boolean) @@ -2202,11 +2204,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 11:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 10:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2231,7 +2233,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2341,11 +2343,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2370,7 +2372,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2480,11 +2482,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 12:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 11:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2509,7 +2511,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2720,11 +2722,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 13:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 12:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2749,7 +2751,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index e4bc4f0..a60f308 100644 --- ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -147,6 +147,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 4901e83..87c1e97 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -745,6 +745,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index a841d4c..0775e3d 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -320,6 +320,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index b1209d9..7c3703e 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -287,6 +287,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -506,6 +508,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) @@ -1587,6 +1591,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1806,6 +1812,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index 068453f..9444cfe 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -116,6 +116,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -286,6 +288,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -456,6 +460,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -613,6 +619,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -770,6 +778,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 687b4af..4aaaf38 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -449,11 +449,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -478,7 +478,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1380,11 +1380,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -1409,7 +1409,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2335,11 +2335,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:timestamp - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) @@ -2364,7 +2364,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2874,11 +2874,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -2903,7 +2903,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3805,11 +3805,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -3834,7 +3834,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4760,11 +4760,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:timestamp - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) @@ -4789,7 +4789,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index 568549d..b4b0241 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: name (type: string), age (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index 3f92327..b94dfa9 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -67,6 +67,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out index bd42ed2..bb5f2cf 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out @@ -104,6 +104,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index 8fb0752..98208e4 100644 --- ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -119,6 +119,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -326,6 +328,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index 5b43765..5a745e0 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -354,10 +354,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 20:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -382,7 +383,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 9859824..7a0c3d7 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2791,6 +2791,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -2934,6 +2936,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index de30ca7..7d97aa6 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 7:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -86,7 +86,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index c713303..5222385 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -234,6 +234,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 1:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) @@ -442,6 +444,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 1:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) @@ -3811,6 +3815,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int, col 7:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) @@ -4501,6 +4507,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) @@ -6055,6 +6063,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 1:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index 8dcb900..c58fbba 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -50,6 +50,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string) @@ -293,6 +295,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: int) @@ -530,6 +534,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index b2db5a5..cfb45f1 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -54,6 +54,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -235,6 +237,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -566,6 +570,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -747,6 +753,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1078,6 +1086,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1259,6 +1269,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1636,6 +1648,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_1.q.out ql/src/test/results/clientpositive/llap/vectorization_1.q.out index c87926c..2459461 100644 --- ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -87,6 +87,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_12.q.out ql/src/test/results/clientpositive/llap/vectorization_12.q.out index 0ead6c4..278a146 100644 --- ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -111,6 +111,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index d72c298..23ffd42 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -113,6 +113,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -467,6 +469,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index 7ae99a3..2899f14 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -113,6 +113,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 31363df..150d76c 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -109,6 +109,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index 59f2d10..d1e3ee8 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_2.q.out ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 83833da..45a4452 100644 --- ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -91,6 +91,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_3.q.out ql/src/test/results/clientpositive/llap/vectorization_3.q.out index 3c502cd..eaf26f3 100644 --- ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -96,6 +96,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_4.q.out ql/src/test/results/clientpositive/llap/vectorization_4.q.out index a8cfa48..5ba9b03 100644 --- ql/src/test/results/clientpositive/llap/vectorization_4.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_4.q.out @@ -91,6 +91,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_5.q.out ql/src/test/results/clientpositive/llap/vectorization_5.q.out index 5124740..a3ea0ad 100644 --- ql/src/test/results/clientpositive/llap/vectorization_5.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_5.q.out @@ -84,6 +84,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index 59f2d10..d1e3ee8 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 7be4d7d..7018996 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -291,6 +291,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -491,10 +493,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -522,7 +525,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -655,6 +658,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint), cdouble (type: double) @@ -892,6 +897,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index e6427fa..4e34429 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -44,6 +44,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index 80c7c0c..ec2f54f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -70,15 +70,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_part - Statistics: Num rows: 200 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: (cdouble + 2.0D) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -103,13 +103,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 3a5c272..cd13d41 100644 --- ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -118,6 +118,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -380,6 +382,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash @@ -634,6 +638,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash @@ -867,6 +873,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash @@ -2202,6 +2210,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:smallint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) @@ -2479,6 +2489,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) @@ -2800,6 +2812,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: timestamp), _col1 (type: string) @@ -3202,6 +3216,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: boolean) @@ -3440,6 +3456,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3555,6 +3573,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3742,6 +3762,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3857,6 +3879,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3972,6 +3996,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4087,6 +4113,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4202,6 +4230,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4317,6 +4347,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index aec161d..93fd3b6 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -306,6 +306,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -450,6 +452,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index 8351192..d7dbb25 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -1262,6 +1262,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index f19e2ca..2f1ca8c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -73,6 +73,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -200,10 +202,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -229,7 +232,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -282,6 +285,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 15b62c9..9b26a8b 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -85,7 +85,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -308,7 +308,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -625,7 +625,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 6 @@ -669,7 +669,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1068,7 +1068,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1380,7 +1380,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1690,7 +1690,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1848,7 +1848,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2305,7 +2305,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2440,7 +2440,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2785,7 +2785,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2943,7 +2943,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3055,7 +3055,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -3219,7 +3219,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3361,7 +3361,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 6 @@ -3405,7 +3405,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4250,7 +4250,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 10 @@ -4308,7 +4308,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 7 @@ -4644,7 +4644,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4820,7 +4820,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -4864,7 +4864,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5045,7 +5045,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5208,7 +5208,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5360,7 +5360,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5512,7 +5512,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5628,7 +5628,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5826,7 +5826,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6204,7 +6204,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -6248,7 +6248,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6536,7 +6536,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 228bd9d..b67d547 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -137,6 +137,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -371,6 +373,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -605,6 +609,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -840,6 +846,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -911,6 +919,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1177,6 +1187,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1205,6 +1217,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1468,6 +1482,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 8abd234..0815ef6 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -75,6 +75,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index f05e5c0..c1ab83a 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -206,6 +206,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -409,6 +411,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -612,6 +616,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index acb9126..ad11a9b 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -298,6 +298,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: _col0 (type: tinyint) diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index ccf9aae..5a0d8e5 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -3542,6 +3542,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 46a2470..c8df117 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -371,6 +373,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -512,6 +516,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index c9dd434..9dd16d7 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -806,6 +806,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -933,6 +935,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1078,6 +1082,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/orc_struct_type_vectorization.q.out index c67e8d1..be5e87e 100644 --- ql/src/test/results/clientpositive/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/orc_struct_type_vectorization.q.out @@ -235,6 +235,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out index 03488a1..ac985ee 100644 --- ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out @@ -211,6 +211,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -483,6 +485,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -755,6 +759,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out index 289909d..a6cbd47 100644 --- ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out @@ -227,6 +227,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/parquet_struct_type_vectorization.q.out index ed9bb09..8822bf8 100644 --- ql/src/test/results/clientpositive/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/parquet_struct_type_vectorization.q.out @@ -235,6 +235,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/parquet_vectorization_0.q.out index fbb78b1..8bfe2e5 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_0.q.out @@ -48,6 +48,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -214,6 +216,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -536,6 +540,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -702,6 +708,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1024,6 +1032,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1190,6 +1200,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1558,6 +1570,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/parquet_vectorization_1.q.out index afada38..b77bfd9 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_1.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_1.q.out @@ -81,6 +81,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/parquet_vectorization_12.q.out index c284977..8f20fee 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_12.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_12.q.out @@ -105,6 +105,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/parquet_vectorization_13.q.out index 6dd6e3f..d15fc1f 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_13.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_13.q.out @@ -107,6 +107,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -437,6 +439,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/parquet_vectorization_14.q.out index c501fab..b0ebe5b 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_14.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_14.q.out @@ -107,6 +107,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/parquet_vectorization_15.q.out index 39057d6..cc331cb 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_15.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_15.q.out @@ -103,6 +103,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/parquet_vectorization_16.q.out index cf06c91..e18ed22 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_16.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_16.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/parquet_vectorization_2.q.out index 131797d..7384c01 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_2.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_2.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/parquet_vectorization_3.q.out index f98dea6..f77d1bc 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_3.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_3.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/parquet_vectorization_4.q.out index 973e2bd..8826480 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_4.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_4.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/parquet_vectorization_5.q.out index e20dcbf..02054ff 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_5.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_5.q.out @@ -78,6 +78,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/parquet_vectorization_9.q.out index cf06c91..e18ed22 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_9.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_9.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out index 8a81b34..376ef9d 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out @@ -229,6 +229,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -361,6 +363,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -616,6 +620,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index 1916d25..6543e70 100644 --- ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -73,10 +73,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ds (type: string) @@ -101,7 +102,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -312,10 +313,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -332,7 +334,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -797,10 +799,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 3:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -817,7 +820,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1277,10 +1280,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 6:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: bigint) @@ -1297,7 +1301,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1559,10 +1563,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 4:decimal(10,0) - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: decimal(10,0)) @@ -1579,7 +1584,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1835,10 +1840,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -1855,7 +1861,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1898,10 +1904,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -1918,7 +1925,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -2537,10 +2544,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -2560,10 +2568,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -2580,7 +2589,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -3043,10 +3052,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -3063,7 +3073,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -3528,10 +3538,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 3:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -3548,7 +3559,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -3802,10 +3813,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -3822,7 +3834,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -4494,10 +4506,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 4:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -4514,7 +4527,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -4805,10 +4818,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 2008-04-08) -> 5:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: '2008-04-08' (type: string) @@ -4833,7 +4847,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -5237,10 +5251,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -5260,10 +5275,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -5280,7 +5296,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -5531,10 +5547,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -5551,7 +5568,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -5784,10 +5801,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -5804,7 +5822,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6036,10 +6054,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -6056,7 +6075,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6290,10 +6309,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -6310,7 +6330,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6353,10 +6373,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -6373,7 +6394,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6967,6 +6988,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7022,6 +7045,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7091,10 +7116,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7111,10 +7137,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7161,10 +7188,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7181,10 +7209,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7272,6 +7301,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7327,6 +7358,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7457,10 +7490,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7515,10 +7549,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7604,6 +7639,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7659,6 +7696,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7728,10 +7767,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7748,10 +7788,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7798,10 +7839,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7818,10 +7860,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7909,6 +7952,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7964,6 +8009,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8096,10 +8143,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8154,10 +8202,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8244,6 +8293,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8299,6 +8350,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8368,10 +8421,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8388,10 +8442,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8438,10 +8493,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8458,10 +8514,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8492,10 +8549,11 @@ STAGE PLANS: vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ds (type: string) @@ -8520,7 +8578,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8553,6 +8611,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8608,6 +8668,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8738,10 +8800,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8796,10 +8859,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8903,10 +8967,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8923,7 +8988,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8979,6 +9044,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -9146,10 +9213,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 3:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9166,7 +9234,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9223,6 +9291,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -9382,10 +9452,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -9402,7 +9473,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9454,10 +9525,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -9474,7 +9546,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9547,6 +9619,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -9717,10 +9791,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -9740,10 +9815,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -9760,7 +9836,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9815,6 +9891,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -9981,10 +10059,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -10001,7 +10080,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10057,6 +10136,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -10215,10 +10296,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 3:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -10235,7 +10317,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10292,6 +10374,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -10449,10 +10533,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -10469,7 +10554,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10526,6 +10611,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -10671,10 +10758,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 2008-04-08) -> 5:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: '2008-04-08' (type: string) @@ -10699,7 +10787,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10794,6 +10882,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -10955,10 +11045,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -10975,7 +11066,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11031,6 +11122,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -11222,6 +11315,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -11412,6 +11507,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -11551,10 +11648,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -11571,7 +11669,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11623,10 +11721,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -11643,7 +11742,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11716,6 +11815,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -11999,6 +12100,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12136,6 +12239,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12191,6 +12296,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12260,10 +12367,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12280,10 +12388,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12330,10 +12439,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12350,10 +12460,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12441,6 +12552,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12496,6 +12609,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12628,10 +12743,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12686,10 +12802,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 9f5fa2a..7c2441a 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -163,6 +163,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -363,6 +365,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -749,6 +753,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1094,11 +1100,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1122,7 +1128,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1230,11 +1236,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1258,7 +1264,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1366,11 +1372,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1394,7 +1400,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -1502,11 +1508,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1530,7 +1536,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 83d5a62..eff91d1 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -148,6 +148,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 1444cd8..a2934ef 100644 --- ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1264,10 +1264,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 16:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ws_order_number (type: int) @@ -1290,7 +1291,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1321,6 +1322,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_data_types.q.out ql/src/test/results/clientpositive/spark/vector_data_types.q.out index 310a23a..a3cd0c2 100644 --- ql/src/test/results/clientpositive/spark/vector_data_types.q.out +++ ql/src/test/results/clientpositive/spark/vector_data_types.q.out @@ -374,6 +374,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index d37a27e..daf08be 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -87,6 +87,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -265,6 +267,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -477,6 +481,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -674,6 +680,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out index 0236980..4464d81 100644 --- ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out @@ -143,6 +143,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out index a68002e..76c4f42 100644 --- ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out @@ -145,6 +145,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index 168aa77..b0bcca0 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -245,10 +245,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -269,7 +270,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index ff1af2c..5fe324e 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -89,10 +89,11 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -113,7 +114,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: @@ -141,10 +142,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -167,7 +169,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -367,6 +369,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 17:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) @@ -415,10 +419,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -441,7 +446,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index e6546c5..250560c 100644 --- ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -146,6 +146,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index 9a1fa53..8fa1e2f 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -832,6 +832,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 32bcc9b..a122184 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -406,6 +406,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_string_concat.q.out ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index 30dbaf1..bb6a956 100644 --- ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -352,10 +352,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 20:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -379,7 +380,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index c3201bf..170a956 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -53,6 +53,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -232,6 +234,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -559,6 +563,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -738,6 +744,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1065,6 +1073,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1244,6 +1254,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1617,6 +1629,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_1.q.out ql/src/test/results/clientpositive/spark/vectorization_1.q.out index 71625e0..3fc7f3e 100644 --- ql/src/test/results/clientpositive/spark/vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_1.q.out @@ -86,6 +86,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_12.q.out ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 24cfa4e..8e88ae3 100644 --- ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -110,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index c2a8006..3a0c3b7 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -464,6 +466,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 95bf29b..aa679ef 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index d0b03b3..4c11185 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -108,6 +108,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index 8798ebe..baa24a7 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_2.q.out ql/src/test/results/clientpositive/spark/vectorization_2.q.out index 99afc2b..7c943b6 100644 --- ql/src/test/results/clientpositive/spark/vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_2.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_3.q.out ql/src/test/results/clientpositive/spark/vectorization_3.q.out index 2bccf64..78c9092 100644 --- ql/src/test/results/clientpositive/spark/vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_3.q.out @@ -95,6 +95,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_4.q.out ql/src/test/results/clientpositive/spark/vectorization_4.q.out index 922eb90..9c1c8e7 100644 --- ql/src/test/results/clientpositive/spark/vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_4.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_5.q.out ql/src/test/results/clientpositive/spark/vectorization_5.q.out index 4cf4548..9d80010 100644 --- ql/src/test/results/clientpositive/spark/vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_5.q.out @@ -83,6 +83,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index 8798ebe..baa24a7 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index c46fc03..ffaef58 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -43,6 +43,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out index d58a989..794e051 100644 --- ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out @@ -321,7 +321,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 6215906..9d709b6 100644 --- ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -117,6 +117,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -377,6 +379,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash @@ -629,6 +633,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash @@ -860,6 +866,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash @@ -2185,6 +2193,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:smallint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) @@ -2460,6 +2470,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) @@ -2779,6 +2791,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: timestamp), _col1 (type: string) @@ -3179,6 +3193,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: boolean) @@ -3415,6 +3431,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3528,6 +3546,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3713,6 +3733,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3826,6 +3848,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3939,6 +3963,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4052,6 +4078,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4165,6 +4193,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4278,6 +4308,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 58e295d..735eded 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -303,6 +303,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -445,6 +447,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 5104c80..f0e0e88 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -120,6 +120,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index edc8f74..59bd302 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -3494,6 +3494,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 68b89a7..7366d05 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -797,6 +797,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -922,6 +924,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1065,6 +1069,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vector_aggregate_9.q.out ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 0f7fcc1..468bbb1 100644 --- ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -140,6 +140,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -246,6 +248,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -352,6 +356,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out index 4d2b0dc..18ec1fd 100644 --- ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out @@ -79,6 +79,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index 3f9e90b..263e0e1 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -193,6 +193,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -343,6 +345,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:binary native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bin (type: binary) diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 3d3d761..b6bdb5a 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -143,6 +143,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index b38cbe7..5faf92e 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -104,6 +104,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) @@ -292,6 +294,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index 48d38c3..ad2a824 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -267,6 +269,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_count_simple.q.out ql/src/test/results/clientpositive/vector_count_simple.q.out new file mode 100644 index 0000000..2a81462 --- /dev/null +++ ql/src/test/results/clientpositive/vector_count_simple.q.out @@ -0,0 +1,6606 @@ +PREHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SIMPLE [(groupby_long_1a_txt)groupby_long_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SIMPLE [(groupby_long_1a_nonull_txt)groupby_long_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a_nonull values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SIMPLE [(groupby_long_1b_txt)groupby_long_1b_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1b values (32030) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (32030) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SIMPLE [(groupby_long_1b_nonull_txt)groupby_long_1b_nonull_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b_nonull values (31713) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (31713) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b_nonull values (34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c.key SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: Lineage: groupby_long_1c_nonull.b_string SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c_nonull.key SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: key, b_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b_string) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +9999 1 +NULL 3 +PREHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +9999 1 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +PREHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SIMPLE [(groupby_string_1a_txt)groupby_string_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SIMPLE [(groupby_string_1a_nonull_txt)groupby_string_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b +POSTHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b +POSTHOOK: Lineage: groupby_string_1b.key SIMPLE [(groupby_string_1b_txt)groupby_string_1b_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SIMPLE [(groupby_string_1b_nonull_txt)groupby_string_1b_nonull_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1b_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_date SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 0 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 3 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 0 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 0 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 6 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date) + outputColumnNames: key, s_date + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_date) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 5 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_timestamp (type: timestamp) + outputColumnNames: key, s_timestamp + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_timestamp) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 4 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a +POSTHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a +POSTHOOK: Lineage: groupby_serialize_1a.key SIMPLE [(groupby_serialize_1a_txt)groupby_serialize_1a_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: Lineage: groupby_serialize_1a_nonull.key SIMPLE [(groupby_serialize_1a_nonull_txt)groupby_serialize_1a_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b +POSTHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b +POSTHOOK: Lineage: groupby_serialize_1b.c_double SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_smallint SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_string SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.key SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_double SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_smallint SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_string SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.key SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_smallint (type: smallint) + outputColumnNames: key, c_smallint + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_smallint) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_string (type: string) + outputColumnNames: key, c_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_string) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string), ts (type: timestamp) + outputColumnNames: s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: d (type: double), ts (type: timestamp) + outputColumnNames: d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(dec) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(bin) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(i) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 diff --git ql/src/test/results/clientpositive/vector_data_types.q.out ql/src/test/results/clientpositive/vector_data_types.q.out index 688e6a6..9da877c 100644 --- ql/src/test/results/clientpositive/vector_data_types.q.out +++ ql/src/test/results/clientpositive/vector_data_types.q.out @@ -345,6 +345,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index 16c80f0..4faa4d2 100644 --- ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -82,6 +82,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -224,6 +226,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -399,6 +403,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -560,6 +566,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_decimal_precision.q.out ql/src/test/results/clientpositive/vector_decimal_precision.q.out index fd6d9c3..5dbc945 100644 --- ql/src/test/results/clientpositive/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/vector_decimal_precision.q.out @@ -586,6 +586,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -1171,6 +1173,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_distinct_2.q.out ql/src/test/results/clientpositive/vector_distinct_2.q.out index c3d2d89..ea39eff 100644 --- ql/src/test/results/clientpositive/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/vector_distinct_2.q.out @@ -138,6 +138,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out index 6b2c7fe..46b0f84 100644 --- ql/src/test/results/clientpositive/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -43,6 +43,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -186,6 +188,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -337,6 +341,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -488,6 +494,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) diff --git ql/src/test/results/clientpositive/vector_groupby_3.q.out ql/src/test/results/clientpositive/vector_groupby_3.q.out index dfac04d..34c24da 100644 --- ql/src/test/results/clientpositive/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/vector_groupby_3.q.out @@ -140,6 +140,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 01c5096..cfcc78b 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -53,6 +53,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -336,6 +338,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index 8a6135e..5c88d87 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -266,6 +266,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -458,6 +460,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -734,6 +738,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_item_sk (type: int) @@ -932,6 +938,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index e89b6bc..a1b3201 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -164,6 +164,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: s_store_id (type: string), 0L (type: bigint) @@ -275,6 +277,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/vector_include_no_sel.q.out ql/src/test/results/clientpositive/vector_include_no_sel.q.out index 848823f..921cba0 100644 --- ql/src/test/results/clientpositive/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/vector_include_no_sel.q.out @@ -241,6 +241,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_orderby_5.q.out ql/src/test/results/clientpositive/vector_orderby_5.q.out index 793d99e..f8698bb 100644 --- ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -141,6 +141,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index a6d87c2..27b022c 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -703,6 +703,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index 77a5bc7..f0006e1 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -343,6 +343,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index 20f8f4b..f6563b4 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -244,7 +244,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Single Key Column IS false","Single COUNT aggregation or Duplicate Reduction IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -284,7 +284,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Single Key Column IS false","Single COUNT aggregation or Duplicate Reduction IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -324,7 +324,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Single Key Column IS false","Single COUNT aggregation or Duplicate Reduction IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index 51ed3a2..3e587af 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -782,7 +782,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Single Key Column IS false","Single COUNT aggregation or Duplicate Reduction IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out index 7454c4b..f392b9e 100644 --- ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out +++ ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out @@ -98,6 +98,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -243,6 +245,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index b46501e..642d3b2 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -61,6 +61,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out index 8784836..c39f561 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out @@ -98,6 +98,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/vector_string_concat.q.out ql/src/test/results/clientpositive/vector_string_concat.q.out index bede8a1..1eed7f5 100644 --- ql/src/test/results/clientpositive/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -348,6 +348,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 20:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index 13fb6d1..17eb3bb 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -56,6 +56,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vectorization_1.q.out ql/src/test/results/clientpositive/vectorization_1.q.out index bb8e483..4bde6ff 100644 --- ql/src/test/results/clientpositive/vectorization_1.q.out +++ ql/src/test/results/clientpositive/vectorization_1.q.out @@ -81,6 +81,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_12.q.out ql/src/test/results/clientpositive/vectorization_12.q.out index e129730..ada86cd 100644 --- ql/src/test/results/clientpositive/vectorization_12.q.out +++ ql/src/test/results/clientpositive/vectorization_12.q.out @@ -105,6 +105,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/vectorization_13.q.out ql/src/test/results/clientpositive/vectorization_13.q.out index 96eda74..03fc9e3 100644 --- ql/src/test/results/clientpositive/vectorization_13.q.out +++ ql/src/test/results/clientpositive/vectorization_13.q.out @@ -107,6 +107,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -437,6 +439,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index 7a7a817..9c65a7d 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -107,6 +107,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index dbef3e7..16586b7 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -103,6 +103,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_16.q.out ql/src/test/results/clientpositive/vectorization_16.q.out index 571eae0..32a3545 100644 --- ql/src/test/results/clientpositive/vectorization_16.q.out +++ ql/src/test/results/clientpositive/vectorization_16.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_2.q.out ql/src/test/results/clientpositive/vectorization_2.q.out index e3d6ad0..0dbd04d 100644 --- ql/src/test/results/clientpositive/vectorization_2.q.out +++ ql/src/test/results/clientpositive/vectorization_2.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_3.q.out ql/src/test/results/clientpositive/vectorization_3.q.out index bb6c014..9a0f112 100644 --- ql/src/test/results/clientpositive/vectorization_3.q.out +++ ql/src/test/results/clientpositive/vectorization_3.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_4.q.out ql/src/test/results/clientpositive/vectorization_4.q.out index 395431c..0107dad 100644 --- ql/src/test/results/clientpositive/vectorization_4.q.out +++ ql/src/test/results/clientpositive/vectorization_4.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_5.q.out ql/src/test/results/clientpositive/vectorization_5.q.out index dfe9715..ed4eeff 100644 --- ql/src/test/results/clientpositive/vectorization_5.q.out +++ ql/src/test/results/clientpositive/vectorization_5.q.out @@ -78,6 +78,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_9.q.out ql/src/test/results/clientpositive/vectorization_9.q.out index 571eae0..32a3545 100644 --- ql/src/test/results/clientpositive/vectorization_9.q.out +++ ql/src/test/results/clientpositive/vectorization_9.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 7474547..1d89b83 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -245,6 +245,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -425,6 +427,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -729,6 +733,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/vectorization_nested_udf.q.out index 2c4fa69..324576a 100644 --- ql/src/test/results/clientpositive/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/vectorization_nested_udf.q.out @@ -38,6 +38,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 31dcd37..dc8b84e 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -292,6 +292,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -407,6 +409,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index 50c3448..a5e2906 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -1240,6 +1240,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index d9c781c..3c5854e 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -93,6 +93,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out index e9a0e45..01c7db6 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out @@ -114,6 +114,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out index fb7198d..fe96550 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out @@ -133,6 +133,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -307,6 +309,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -481,6 +485,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index 3b7de64..65e2cff 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -360,6 +360,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: _col0 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorized_timestamp.q.out ql/src/test/results/clientpositive/vectorized_timestamp.q.out index b0bfc8b..8633b04 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -135,6 +135,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -322,6 +324,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -429,6 +433,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 244aca6..553847c 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -732,6 +732,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -839,6 +841,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -959,6 +963,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index fbb89a9..fce013c 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -26,10 +26,13 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import org.apache.commons.lang.StringUtils; import org.apache.tools.ant.BuildException; import org.apache.tools.ant.Task; @@ -1162,6 +1165,32 @@ //template, , , {"VectorUDAFVarMerge", "VectorUDAFVarPartial2", "PARTIAL2"}, {"VectorUDAFVarMerge", "VectorUDAFVarFinal", "FINAL"}, + + {"GroupByHashSingleKeyOperatorBase", "VectorGroupByHash", "Long", "KeySingleCountOperatorBase", "SingleCount"}, + {"GroupByHashSingleKeyOperatorBase", "VectorGroupByHash", "String", "KeySingleCountOperatorBase", "SingleCount"}, + {"GroupByHashSingleKeyOperatorBase", "VectorGroupByHash", "Serialize", "KeySingleCountOperatorBase", "SingleCount"}, + + {"GroupByHashSingleKeySingleCountColumnOperator", "VectorGroupByHash", "Long", "KeySingleCountColumnOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountColumnOperator", "VectorGroupByHash", "String", "KeySingleCountColumnOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountColumnOperator", "VectorGroupByHash", "Serialize", "KeySingleCountColumnOperator", "SingleCount"}, + + {"GroupByHashSingleKeySingleCountKeyOperator", "VectorGroupByHash", "Long", "KeySingleCountKeyOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountKeyOperator", "VectorGroupByHash", "String", "KeySingleCountKeyOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountKeyOperator", "VectorGroupByHash", "Serialize", "KeySingleCountKeyOperator", "SingleCount"}, + + {"GroupByHashSingleKeySingleCountStarOperator", "VectorGroupByHash", "Long", "KeySingleCountStarOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountStarOperator", "VectorGroupByHash", "String", "KeySingleCountStarOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountStarOperator", "VectorGroupByHash", "Serialize", "KeySingleCountStarOperator", "SingleCount"}, + + + {"GroupByHashSingleKeyOperatorBase", "VectorGroupByHash", "Long", "KeyDuplicateReductionOperatorBase", "DuplicateReduction"}, + {"GroupByHashSingleKeyOperatorBase", "VectorGroupByHash", "String", "KeyDuplicateReductionOperatorBase", "DuplicateReduction"}, + {"GroupByHashSingleKeyOperatorBase", "VectorGroupByHash", "Serialize", "KeyDuplicateReductionOperatorBase", "DuplicateReduction"}, + + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "Long", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "String", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "Serialize", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + }; @@ -1174,6 +1203,11 @@ private String udafOutputDirectory; private String udafClassesDirectory; private String udafTemplateDirectory; + + private String groupByOperatorOutputDirectory; + private String groupByOperatorClassesDirectory; + private String groupByOperatorTemplateDirectory; + private GenVectorTestCode testCodeGen; static String joinPath(String...parts) { @@ -1210,6 +1244,16 @@ public void init(String templateBaseDir, String buildDir) { udafTemplateDirectory = joinPath(generationDirectory.getAbsolutePath(), "UDAFTemplates"); + String groupByOperator = joinPath("org", "apache", "hadoop", + "hive", "ql", "exec", "vector", "groupby", "operator", "gen"); + File groupByOperatorOutput = new File(joinPath(buildPath, groupByOperator)); + File groupByOperatorClasses = new File(joinPath(compiledPath, groupByOperator)); + groupByOperatorOutputDirectory = groupByOperatorOutput.getAbsolutePath(); + groupByOperatorClassesDirectory = groupByOperatorClasses.getAbsolutePath(); + + groupByOperatorTemplateDirectory = + joinPath(generationDirectory.getAbsolutePath(), "GroupByOperatorTemplates"); + File testCodeOutput = new File( joinPath(buildDir, "generated-test-sources", "java", "org", @@ -1433,6 +1477,13 @@ private void generate() throws Exception { } else if (tdesc[0].equals("TimestampArithmeticDate")) { generateTimestampArithmeticDate(tdesc); + } else if ( + tdesc[0].equals("GroupByHashSingleKeyOperatorBase") || + tdesc[0].equals("GroupByHashSingleKeySingleCountColumnOperator") || + tdesc[0].equals("GroupByHashSingleKeySingleCountKeyOperator") || + tdesc[0].equals("GroupByHashSingleKeySingleCountStarOperator") || + tdesc[0].equals("GroupByHashSingleKeyDuplicateReductionOperator")) { + generateGroupByOperator(tdesc); } else { continue; } @@ -3492,35 +3543,115 @@ private static boolean isTimestampIntervalType(String type) { || type.equals("interval_day_time")); } - private boolean containsDefinedStrings(Set defineSet, String commaDefinedString) { - String[] definedStrings = commaDefinedString.split(","); - boolean result = false; - for (String definedString : definedStrings) { - if (defineSet.contains(definedString)) { - result = true; - break; + private void generateGroupByOperator(String[] tdesc) throws Exception { + String templateName = tdesc[0]; + String prefix = tdesc[1]; + String singleKeyVariation = tdesc[2]; + String suffix = tdesc[3]; + String aggregationVariation = tdesc[4]; + + //Read the template into a string; + String className = prefix + singleKeyVariation + suffix; + File templateFile = + new File(joinPath(this.groupByOperatorTemplateDirectory, templateName + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + final String keyColumnVectorType; + if (singleKeyVariation.equals("Long")) { + keyColumnVectorType = "LongColumnVector"; + } else if (singleKeyVariation.equals("String")) { + keyColumnVectorType = "BytesColumnVector"; + } else { + keyColumnVectorType = "ColumnVector"; + } + templateString = templateString.replaceAll("", singleKeyVariation); + templateString = templateString.replaceAll("", singleKeyVariation.toLowerCase()); + templateString = templateString.replaceAll("", aggregationVariation); + templateString = templateString.replaceAll("", aggregationVariation.toLowerCase()); + templateString = templateString.replaceAll("", keyColumnVectorType); + + final String defineName = singleKeyVariation.toUpperCase() + "_KEY"; + templateString = evaluateIfDefined(templateString, defineName, + this.groupByOperatorTemplateDirectory); + + writeFile(templateFile.lastModified(), groupByOperatorOutputDirectory, groupByOperatorClassesDirectory, + className, templateString); + } + + private boolean matchesDefinedStrings(Set defineSet, Set newIfDefinedSet, + IfDefinedMode ifDefinedMode) { + switch (ifDefinedMode) { + case SINGLE: + case AND_ALL: + for (String candidateString : newIfDefinedSet) { + if (!defineSet.contains(candidateString)) { + return false; + } + } + return true; + case OR_ANY: + for (String candidateString : newIfDefinedSet) { + if (defineSet.contains(candidateString)) { + return true; + } + } + return false; + default: + throw new RuntimeException("Unexpected if defined mode " + ifDefinedMode); + } + } + + public enum IfDefinedMode { + SINGLE, + AND_ALL, + OR_ANY; + } + + private IfDefinedMode parseIfDefinedMode(String newIfDefinedString, Set newIfDefinedSet) { + final String[] newIfDefinedStrings; + final IfDefinedMode ifDefinedMode; + int index = newIfDefinedString.indexOf("&&"); + if (index != -1) { + newIfDefinedStrings = newIfDefinedString.split("&&"); + ifDefinedMode = IfDefinedMode.AND_ALL; + } else { + index = newIfDefinedString.indexOf("||"); + if (index == -1) { + + // One element. + newIfDefinedSet.add(newIfDefinedString); + return IfDefinedMode.SINGLE; + } else { + newIfDefinedStrings = newIfDefinedString.split("\\|\\|"); + ifDefinedMode = IfDefinedMode.OR_ANY; } } - return result; + for (String newDefinedString : newIfDefinedStrings) { + newIfDefinedSet.add(newDefinedString); + } + return ifDefinedMode; } - private int doIfDefinedStatement(String[] lines, int index, Set definedSet, - boolean outerInclude, StringBuilder sb) { - String ifLine = lines[index]; + private int doIfDefinedStatement(List linesList, int index, Set definedSet, + boolean outerInclude, List ifDefinedEvaluatedLinesList) { + String ifLine = linesList.get(index); final int ifLineNumber = index + 1; - String commaDefinedString = ifLine.substring("#IF ".length()); - boolean includeBody = containsDefinedStrings(definedSet, commaDefinedString); + String newIfDefinedString = ifLine.substring("#IF ".length()); + Set newIfDefinedSet = new HashSet(); + IfDefinedMode ifDefinedMode = parseIfDefinedMode(newIfDefinedString, newIfDefinedSet); + boolean includeBody = matchesDefinedStrings(definedSet, newIfDefinedSet, ifDefinedMode); index++; - final int end = lines.length; + final int end = linesList.size(); while (true) { if (index >= end) { - throw new RuntimeException("Unmatched #IF at line " + index + " for " + commaDefinedString); + throw new RuntimeException("Unmatched #IF at line " + index + " for " + newIfDefinedString); } - String line = lines[index]; + String line = linesList.get(index); if (line.length() == 0 || line.charAt(0) != '#') { if (outerInclude && includeBody) { - sb.append(line); - sb.append("\n"); + ifDefinedEvaluatedLinesList.add(line); + // sb.append(line); + // sb.append("\n"); } index++; continue; @@ -3529,7 +3660,9 @@ private int doIfDefinedStatement(String[] lines, int index, Set definedS // A pound # statement (IF/ELSE/ENDIF). if (line.startsWith("#IF ")) { // Recurse. - index = doIfDefinedStatement(lines, index, definedSet, outerInclude && includeBody, sb); + index = + doIfDefinedStatement( + linesList, index, definedSet, outerInclude && includeBody, ifDefinedEvaluatedLinesList); } else if (line.equals("#ELSE")) { // Flip inclusion. includeBody = !includeBody; @@ -3538,10 +3671,10 @@ private int doIfDefinedStatement(String[] lines, int index, Set definedS throw new RuntimeException("Missing defined strings with #ENDIF on line " + (index + 1)); } else if (line.startsWith("#ENDIF ")) { String endCommaDefinedString = line.substring("#ENDIF ".length()); - if (!commaDefinedString.equals(endCommaDefinedString)) { + if (!newIfDefinedString.equals(endCommaDefinedString)) { throw new RuntimeException( "#ENDIF defined names \"" + endCommaDefinedString + "\" (line " + ifLineNumber + - " do not match \"" + commaDefinedString + "\" (line " + (index + 1) + ")"); + " do not match \"" + newIfDefinedString + "\" (line " + (index + 1) + ")"); } return ++index; } else { @@ -3550,44 +3683,213 @@ private int doIfDefinedStatement(String[] lines, int index, Set definedS } } - private void doEvaluateIfDefined(String[] lines, int index, Set definedSet, - boolean outerInclude, StringBuilder sb) { - final int end = lines.length; + private void doEvaluateIfDefined(List linesList, int index, Set definedSet, + boolean outerInclude, List ifDefinedEvaluatedLinesList) { + final int end = linesList.size(); while (true) { if (index >= end) { break; } - String line = lines[index]; + String line = linesList.get(index); if (line.length() == 0 || line.charAt(0) != '#') { if (outerInclude) { - sb.append(line); - sb.append("\n"); + ifDefinedEvaluatedLinesList.add(line); } index++; continue; } - // A pound # statement (IF/ELSE/ENDIF). if (line.startsWith("#IF ")) { - index = doIfDefinedStatement(lines, index, definedSet, outerInclude, sb); + + // A pound # statement (#IF #ELSE #ENDIF). + index = + doIfDefinedStatement( + linesList, index, definedSet, outerInclude, ifDefinedEvaluatedLinesList); + } else if ( + !line.startsWith("#BEGIN_LINES ") && + !line.startsWith("#END_LINES") && + line.startsWith("#USE_LINES ") && + line.startsWith("#COMMENT")) { + throw new RuntimeException( + "Problem with #IF #ELSE #ENDIF on line " + (index + 1) + ": " + line); } else { - throw new RuntimeException("Problem with #IF/#ELSE/#ENDIF on line " + (index + 1) + ": " + line); + if (outerInclude) { + ifDefinedEvaluatedLinesList.add(line); + } + index++; } } } - private String evaluateIfDefined(String linesString, List definedList) { + private void doUseLinesCollectAndFilter(List linesList, Map> useLinesMap, + List filteredLinesList) { + + int index = 0; + final int size = linesList.size(); + while (true) { + + if (index >= size) { + return; + } + String line = linesList.get(index); + if (line.startsWith("#BEGIN_LINES ")) { + + final int beginLineIndex = index; + String linesTitle = line.substring("#BEGIN_LINES ".length()); + if (useLinesMap.containsKey(linesTitle)) { + throw new RuntimeException( + "Problem #BEGIN_LINES that started at " + beginLineIndex + + " -- duplicate name " + linesTitle); + } + while (true) { + if (index >= size) { + throw new RuntimeException( + "Problem #BEGIN_LINES that started at " + beginLineIndex + + " -- no matching #END_LINES found"); + } + line = linesList.get(index); + if (line.startsWith("#END_LINES")) { + useLinesMap.put(linesTitle, linesList.subList(beginLineIndex + 1, index)); + break; + } + index++; + } + } else if (line.startsWith("#COMMENT")) { + // Filter out comment lines. + } else { + filteredLinesList.add(line); + } + index++; + } + } + + private void doUseLinesApply(List linesList, Map> useLinesMap, + List resultLinesList) { + + int index = 0; + final int size = linesList.size(); + while (true) { + + if (index >= size) { + return; + } + String line = linesList.get(index); + if (line.startsWith("#USE_LINES ")) { + + String linesTitle = line.substring("#USE_LINES ".length()); + final int blankCharIndex = linesTitle.indexOf(" "); + int pad = 0; + if (blankCharIndex != -1) { + String remainder = linesTitle.substring(blankCharIndex + 1); + linesTitle = linesTitle.substring(0, blankCharIndex); + if (!remainder.startsWith("+")) { + throw new RuntimeException( + "Problem #USE_LINES that started at " + index + + " -- expecting + sign for indent"); + } + String padString = remainder.substring(1); + pad = Integer.valueOf(padString); + } + List useLines = useLinesMap.get(linesTitle); + if (useLines == null) { + throw new RuntimeException( + "Problem #USE_LINES that started at " + index + + " -- name " + linesTitle + " not found"); + } + if (pad == 0) { + resultLinesList.addAll(useLines); + } else { + String padoutString = StringUtils.leftPad("", pad); + for (String useLine : useLines) { + resultLinesList.add(padoutString + useLine); + } + } + } else { + resultLinesList.add(line); + } + index++; + } + } + + private void doIncludeProcessing(String[] lines, String templateDirectory, + List resultList) throws IOException { + + // Just one level. + int index = 0; + final int size = lines.length; + while (true) { + + if (index >= size) { + return; + } + String line = lines[index]; + if (line.startsWith("#INCLUDE ")) { + String includeFileName = line.substring("#INCLUDE ".length()); + File includeFile = + new File(joinPath(templateDirectory, includeFileName + ".txt")); + String includeString = readFile(includeFile); + String[] includeLines = includeString.split("\n"); + List includeLinesList = Arrays.asList(includeLines); + resultList.addAll(includeLinesList); + } else { + resultList.add(line); + } + index++; + } + } + + private String evaluateIfDefined(String linesString, List definedList, + String templateDirectory) throws IOException { String[] lines = linesString.split("\n"); Set definedSet = new HashSet(definedList); + List ifDefinedEvaluatedLinesList = new ArrayList(); + + List includedLinesList; + if (templateDirectory == null) { + includedLinesList = Arrays.asList(lines); + } else { + includedLinesList = new ArrayList(); + doIncludeProcessing(lines, templateDirectory, includedLinesList); + } + + doEvaluateIfDefined(includedLinesList, 0, definedSet, true, ifDefinedEvaluatedLinesList); + + Map> useLinesMap = new HashMap>(); + List filteredLinesList = new ArrayList(); + doUseLinesCollectAndFilter(ifDefinedEvaluatedLinesList, useLinesMap, filteredLinesList); + + List resultLinesList; + if (useLinesMap.isEmpty()) { + resultLinesList = filteredLinesList; + } else { + resultLinesList = new ArrayList(); + doUseLinesApply(filteredLinesList, useLinesMap, resultLinesList); + } + StringBuilder sb = new StringBuilder(); - doEvaluateIfDefined(lines, 0, definedSet, true, sb); + for (String line : resultLinesList) { + sb.append(line); + sb.append("\n"); + } return sb.toString(); } - private String evaluateIfDefined(String linesString, String definedString) { + private String evaluateIfDefined(String linesString, List definedList) + throws IOException { + return evaluateIfDefined(linesString, definedList, null); + } + + private String evaluateIfDefined(String linesString, String definedString) + throws IOException{ return evaluateIfDefined(linesString, Arrays.asList(definedString.split(","))); } + private String evaluateIfDefined(String linesString, String definedString, + String templateDirectory) throws IOException { + return evaluateIfDefined(linesString, Arrays.asList(definedString.split(",")), + templateDirectory); + } + static void writeFile(long templateTime, String outputDir, String classesDir, String className, String str) throws IOException { File outputFile = new File(outputDir, className + ".java");