diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 40ea3ac..1fd56c3 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1679,7 +1679,15 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "joins unnecessary memory will be allocated and then trimmed."), HIVEHYBRIDGRACEHASHJOINBLOOMFILTER("hive.mapjoin.hybridgrace.bloomfilter", true, "Whether to " + "use BloomFilter in Hybrid grace hash join to minimize unnecessary spilling."), - + HIVEMAPJOINFULLOUER("hive.mapjoin.full.outer", true, + "Whether to use MapJoin for FULL OUTER JOINs."), + HIVE_TEST_MAPJOINFULLOUER_OVERRIDE( + "hive.test.mapjoin.full.outer.override", + "none", new StringSet("none", "enable", "disable"), + "internal use only, used to override the hive.mapjoin.full.outer\n" + + "setting. Using enable will force it on and disable will force it off.\n" + + "The default none is do nothing, of course", + true), HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000, "How many rows with the same key value should be cached in memory per smb joined table."), HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000, diff --git data/files/fullouter_long_big_1a.txt data/files/fullouter_long_big_1a.txt new file mode 100644 index 0000000..8cf831f --- /dev/null +++ data/files/fullouter_long_big_1a.txt @@ -0,0 +1,11 @@ +-5310365297525168078 +-6187919478609154811 +968819023021777205 +3313583664488247651 +-5206670856103795573 +\N +-6187919478609154811 +1569543799237464101 +-6187919478609154811 +-8460550397108077433 +-6187919478609154811 diff --git data/files/fullouter_long_big_1a_nonull.txt data/files/fullouter_long_big_1a_nonull.txt new file mode 100644 index 0000000..b2325ad --- /dev/null +++ data/files/fullouter_long_big_1a_nonull.txt @@ -0,0 +1,10 @@ +1569543799237464101 +-6187919478609154811 +968819023021777205 +-8460550397108077433 +-6187919478609154811 +-5310365297525168078 +-6187919478609154811 +-5206670856103795573 +3313583664488247651 +-6187919478609154811 diff --git data/files/fullouter_long_big_1b.txt data/files/fullouter_long_big_1b.txt new file mode 100644 index 0000000..87c2b3c --- /dev/null +++ data/files/fullouter_long_big_1b.txt @@ -0,0 +1,13 @@ +\N +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/fullouter_long_big_1c.txt data/files/fullouter_long_big_1c.txt new file mode 100644 index 0000000..2d13c26 --- /dev/null +++ data/files/fullouter_long_big_1c.txt @@ -0,0 +1,11 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +\N,ABBZ +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/fullouter_long_big_1d.txt data/files/fullouter_long_big_1d.txt new file mode 100644 index 0000000..4137f67 --- /dev/null +++ data/files/fullouter_long_big_1d.txt @@ -0,0 +1,12 @@ +-702028721 +-702028721 +-1780951928 +-670834064 +-814597051 +\N +-814597051 +-814597051 +-702028721 +-2038654700 +\N +-814597051 diff --git data/files/fullouter_long_small_1a.txt data/files/fullouter_long_small_1a.txt new file mode 100644 index 0000000..45d5825 --- /dev/null +++ data/files/fullouter_long_small_1a.txt @@ -0,0 +1,54 @@ +-1339636982994067311,2000-06-20 +-2575185053386712613,2105-01-21 +\N,2098-02-10 +-6784441713807772877,1845-02-16 +\N,2024-01-23 +-4224290881682877258,2185-07-08 +-614848861623872247,2101-05-25 +-2098090254092150988,2163-05-26 +434940853096155515,2275-02-08 +3873405809071478736,2034-06-09 +-2184423060953067642,1880-10-06 +7297177530102477725,1921-05-11 +7937120928560087303,2083-03-14 +\N,2242-02-08 +-2688622006344936758,2129-01-11 +214451696109242839,1977-01-04 +-4961171400048338491,2196-08-10 +4436884039838843341,2031-05-23 +2438535236662373438,1916-01-10 +6049335087268933751,2282-06-09 +8755921538765428593,1827-05-01 +5252407779338300447,2039-03-10 +-2184423060953067642,1853-07-06 +7297177530102477725,1926-04-12 +-2098090254092150988,1817-03-12 +-5754527700632192146,1958-07-15 +-614848861623872247,2112-11-09 +5246983111579595707,1817-07-01 +-2098090254092150988,2219-12-23 +-5706981533666803767,2151-06-09 +7297177530102477725,2125-08-26 +-7707546703881534780,2134-08-20 +214451696109242839,2179-04-18 +3845554233155411208,1805-11-10 +3905351789241845882,2045-12-05 +2438535236662373438,2026-06-23 +-2688622006344936758,1948-10-15 +6049335087268933751,2086-12-17 +-2575185053386712613,1809-07-12 +-327698348664467755,2222-10-15 +-4224290881682877258,1813-05-17 +3873405809071478736,2164-04-23 +-5706981533666803767,1800-09-20 +214451696109242839,1855-05-12 +2438535236662373438,1881-09-16 +5252407779338300447,2042-04-26 +-3655445881497026796,2108-08-16 +3905351789241845882,1866-07-28 +-6784441713807772877,2054-06-17 +5246983111579595707,2260-05-11 +-1339636982994067311,2008-12-03 +3873405809071478736,1918-11-20 +-4224290881682877258,2120-01-16 +3845554233155411208,2264-04-05 diff --git data/files/fullouter_long_small_1a_nonull.txt data/files/fullouter_long_small_1a_nonull.txt new file mode 100644 index 0000000..bf94d5a --- /dev/null +++ data/files/fullouter_long_small_1a_nonull.txt @@ -0,0 +1,51 @@ +5246983111579595707,1817-07-01 +4436884039838843341,2031-05-23 +-4224290881682877258,1813-05-17 +-4961171400048338491,2196-08-10 +-2575185053386712613,2105-01-21 +5252407779338300447,2042-04-26 +-614848861623872247,2101-05-25 +-2098090254092150988,2163-05-26 +2438535236662373438,1881-09-16 +214451696109242839,2179-04-18 +2438535236662373438,2026-06-23 +-2184423060953067642,1853-07-06 +3873405809071478736,2164-04-23 +214451696109242839,1855-05-12 +-6784441713807772877,1845-02-16 +-2688622006344936758,1948-10-15 +7297177530102477725,1921-05-11 +-2575185053386712613,1809-07-12 +3905351789241845882,2045-12-05 +3845554233155411208,1805-11-10 +-3655445881497026796,2108-08-16 +3905351789241845882,1866-07-28 +-1339636982994067311,2008-12-03 +7297177530102477725,2125-08-26 +7297177530102477725,1926-04-12 +-5706981533666803767,1800-09-20 +6049335087268933751,2282-06-09 +3845554233155411208,2264-04-05 +8755921538765428593,1827-05-01 +-1339636982994067311,2000-06-20 +-2098090254092150988,1817-03-12 +3873405809071478736,2034-06-09 +2438535236662373438,1916-01-10 +5246983111579595707,2260-05-11 +-5706981533666803767,2151-06-09 +-614848861623872247,2112-11-09 +-327698348664467755,2222-10-15 +-2184423060953067642,1880-10-06 +434940853096155515,2275-02-08 +-4224290881682877258,2120-01-16 +-5754527700632192146,1958-07-15 +-4224290881682877258,2185-07-08 +-2098090254092150988,2219-12-23 +-7707546703881534780,2134-08-20 +214451696109242839,1977-01-04 +-2688622006344936758,2129-01-11 +7937120928560087303,2083-03-14 +-6784441713807772877,2054-06-17 +3873405809071478736,1918-11-20 +6049335087268933751,2086-12-17 +5252407779338300447,2039-03-10 diff --git data/files/fullouter_long_small_1b.txt data/files/fullouter_long_small_1b.txt new file mode 100644 index 0000000..7d45fe4 --- /dev/null +++ data/files/fullouter_long_small_1b.txt @@ -0,0 +1,72 @@ +2748,2298-06-20 21:01:24 +11232,2533-11-26 12:22:18 +\N,2124-05-07 15:01:19.021 +3198,2428-06-13 16:21:33.955 +-7624,2219-12-03 17:07:19 +24870,2752-12-26 12:32:23.03685163 +14865,2943-03-21 00:42:10.505 +-8624,2644-05-04 04:45:07.839 +-30059,2269-05-04 21:23:44.000339209 +14865,2079-10-06 16:54:35.117 +-8435,2834-12-06 16:38:18.901 +10553,2168-05-05 21:10:59.000152113 +-8624,2282-03-28 07:58:16 +-15361,2219-09-15 20:15:03.000169887 +-14172,1918-09-13 11:44:24.496926711 +26484,1919-03-04 07:32:37.519 +-14172,2355-01-14 23:23:34 +-24775,2920-08-06 15:58:28.261059449 +-23117,2037-01-05 21:52:30.685952759 +17125,2236-07-14 01:54:40.927230276 +21181,2253-03-12 11:55:48.332 +-7373,2662-10-28 12:07:02.000526564 +-8087,2550-06-26 23:57:42.588007617 +29407,2385-12-14 06:03:39.597 +21181,2434-02-20 00:46:29.633 +-14172,2809-06-07 02:10:58 +13598,2421-05-20 14:18:31.000264698 +2748,2759-02-13 18:04:36.000307355 +-22422,1949-03-13 00:07:53.075 +26484,2953-03-10 02:05:26.508953676 +4510,2777-03-24 03:44:28.000169723 +-24775,2035-03-26 08:11:23.375224153 +-30059,2713-10-13 09:28:49 +-20517,2774-06-23 12:04:06.5 +11232,2038-04-06 14:53:59 +32030,2101-09-09 07:35:05.145 +-29600,2333-11-02 15:06:30 +-30306,2619-05-24 10:35:58.000774018 +-7624,2289-08-28 00:14:34 +-4279,2470-08-12 11:21:14.000955747 +-4279,2214-09-10 03:53:06 +-26998,2428-12-26 07:53:45.96925825 +17125,2629-11-15 15:34:52 +-8087,2923-07-02 11:40:26.115 +2632,2561-12-15 15:42:27 +21436,2696-05-08 05:19:24.112 +\N,2971-08-07 12:02:11.000948152 +-7624,2623-03-20 03:18:45.00006465 +-26998,2926-07-18 09:02:46.077 +11232,2507-01-27 22:04:22.49661421 +-30059,2420-12-10 22:12:30 +-15427,2355-01-08 12:34:11.617 +3198,2223-04-14 13:20:49 +-19167,2319-08-26 11:07:11.268 +14865,2220-02-28 03:41:36 +-20517,2233-12-20 04:06:56.666522799 +-15427,2046-06-07 22:58:40.728 +2748,2862-04-20 13:12:39.482805897 +-8435,2642-02-07 11:45:04.353231638 +-19167,2230-12-22 20:25:39.000242111 +-15427,2023-11-09 19:31:21 +13598,2909-06-25 23:22:50 +21436,2526-09-22 23:44:55 +-15361,2434-08-13 20:37:07.000172979 +4510,2293-01-17 13:47:41.00001006 +-8624,2120-02-15 15:36:40.000758423 +-22422,2337-07-19 06:33:02.000353352 +-26998,2268-08-04 12:48:11.848006292 +-22422,2982-12-28 06:30:26.000883228 +\N,2933-06-20 11:48:09.000839488 +3198,2736-12-20 03:59:50.343550301 +-20824,2478-11-05 00:28:05 diff --git data/files/fullouter_long_small_1c.txt data/files/fullouter_long_small_1c.txt new file mode 100644 index 0000000..ff323d3 --- /dev/null +++ data/files/fullouter_long_small_1c.txt @@ -0,0 +1,81 @@ +-1093006502,-69.55665828 +452719211,83003.43722 +1242586043,71.1485 +-934092157,-7843850349.57130038 +294598722,-3542.6 +284554389,5.727146 +90660785,12590.288613 +-99948814,-38076694.3981 +466567142,-9763217822.129028 +1909136587,-8610.078036935181 +1242586043,-4 +\N,1.089120893565337 +1039864870,987601.57 +-466171792,0 +-1681455031,-6.4543 +1755897735,-39.965207 +1585021913,745222.66808954 +448130683,-4302.485366846491 +193709887,0.8 +-424713789,0.48 +1585021913,607.22747 +-1250662632,5454127198.951479 +294598722,-9377326244.444 +193709887,-19889.83 +1039864870,0.7 +1242586043,-749975924224.63 +-1250662632,-544.554649 +-1740848088,-9.157 +-369457052,7.7 +-369457052,560.11907883090455 +90660785,-4564.517185 +466567142,-58810.60586 +466567142,196.5785295398584 +1738753776,1525.280459649262 +1816559437,-1035.7009 +-1490239076,92253.232096 +1039864870,94.04 +560745412,678.25 +-466171792,4227.5344 +1561921421,53050.55 +-99948814,-96386.438 +1519948464,152 +1719049112,-7888197 +-793950320,-16 +-466171792,69.9 +1738753776,-99817635066320.2416 +1091836730,0.02 +891262439,-0.04 +452719211,3020.2938930744636 +-2048404259,3939387044.1 +698032489,-330457.4292625839 +-1197550983,-0.5588796922 +-2123273881,-55.89198 +-2048404259,-0.3222960446251 +1585021913,-5762331.06697112 +1785750809,47443.115 +1909136587,181.07681535944 +1801735854,-1760956929364.267 +\N,4.26165227 +1801735854,-438541294.7 +150678276,-8278 +1479580778,92077343080.7 +1091836730,-5017.14 +193709887,-0.5663 +-1681455031,-11105.372477 +-1250662632,93104 +-1197550983,0.1 +\N,682070836.2649603 +-1197550983,71852.8338674412613 +1561921421,-5.405 +-1740848088,0.506394259 +150678276,15989394.8436 +-793950320,-0.1 +-1740848088,901.441 +-477147437,6 +-1264372462,0.883 +-2123273881,3.959 +-1264372462,-6993985240226 +-1264372462,-899 +-243940373,-97176129669.654953 +-243940373,-583.258 diff --git data/files/fullouter_long_small_1d.txt data/files/fullouter_long_small_1d.txt new file mode 100644 index 0000000..9778d3f --- /dev/null +++ data/files/fullouter_long_small_1d.txt @@ -0,0 +1,39 @@ +533298451 +1164387380 +1614287784 +1635405412 +-1912571616 +-894799664 +-1210744742 +-1014271154 +-747044796 +-1003639073 +436878811 +-1323620496 +-1379355738 +-1712018127 +246169862 +1431997749 +670834064 +1780951928 +-707688773 +1997943409 +1372592319 +-932176731 +162858059 +-683339273 +-497171161 +699863556 +1685473722 +41376947 +-1036083124 +1825107160 +-2038654700 +2119085509 +260588085 +-1792852276 +1831520491 +103640700 +\N +699007128 +1840266070 diff --git data/files/fullouter_multikey_big_1a.txt data/files/fullouter_multikey_big_1a.txt new file mode 100644 index 0000000..fe38c7b --- /dev/null +++ data/files/fullouter_multikey_big_1a.txt @@ -0,0 +1,13 @@ +22767,-1969080993 +-17582,-1730236061 +3556,\N +-17582,1082230084 +-17582,827141667 +1499,371855128 +-17582,9637312 +\N,1082230084 +-6131,-1969080993 +3556,-1969080993 +\N,\N +-18222,-1969080993 +-17582,267529350 diff --git data/files/fullouter_multikey_big_1a_nonull.txt data/files/fullouter_multikey_big_1a_nonull.txt new file mode 100644 index 0000000..40e84b0 --- /dev/null +++ data/files/fullouter_multikey_big_1a_nonull.txt @@ -0,0 +1,10 @@ +-17582,1082230084 +22767,-1969080993 +-17582,827141667 +-17582,-1730236061 +3556,-1969080993 +-6131,-1969080993 +-18222,-1969080993 +1499,371855128 +-17582,267529350 +-17582,9637312 diff --git data/files/fullouter_multikey_big_1b.txt data/files/fullouter_multikey_big_1b.txt new file mode 100644 index 0000000..40cfb9a --- /dev/null +++ data/files/fullouter_multikey_big_1b.txt @@ -0,0 +1,17 @@ +2061-12-19 22:10:32.000628309,21635,ANCO +\N,21635,ANCO +2686-05-23 07:46:46.565832918,13212,NCYBDW +2082-07-14 04:00:40.695380469,12556,NCYBDW +2188-06-04 15:03:14.963259704,9468,AAA +2608-02-23 23:44:02.546440891,26184,NCYBDW +2093-04-10 23:36:54.846,\N,\N +2898-10-01 22:27:02.000871113,10361,NCYBDW +2306-06-21 11:02:00.143124239,1446,\N +\N,-6909,\N +\N,\N,\N +2306-06-21 11:02:00.143124239,-6909,NCYBDW +2093-04-10 23:36:54.846,1446,GHZVPWFO +\N,\N,CCWYD +2686-05-23 07:46:46.565832918,\N,GHZVPWFO +2093-04-10 23:36:54.846,28996,Q +2299-11-15 16:41:30.401,-31077,NCYBDW diff --git data/files/fullouter_multikey_small_1a.txt data/files/fullouter_multikey_small_1a.txt new file mode 100644 index 0000000..4e0742c --- /dev/null +++ data/files/fullouter_multikey_small_1a.txt @@ -0,0 +1,92 @@ +23015,258882280 +23015,-276888585 +21186,-586336015 +-22311,-2055239583 +3412,-1249487623 +\N,1082230084 +20156,-1618478138 +-17788,-738743861 +-24206,-1456409156 +30353,2044473567 +20969,-1995259010 +-23457,-63842445 +3412,-2081156563 +-6131,-1969080993 +23015,-252525791 +30353,1364268303 +23015,564751472 +15404,1078466156 +4586,-586336015 +-4117,-1386947816 +-26894,-63842445 +-17788,-1361776766 +-7386,-2112062470 +23015,-1893013623 +30353,1241923267 +-24206,641361618 +-28129,-2055239583 +-20125,-1995259010 +16166,931172175 +31443,-1968665833 +-28313,837320573 +11460,1078466156 +15061,-63842445 +13672,-63842445 +14400,-825652334 +-7386,100736776 +26944,-1995259010 +-11868,97203778 +12089,-63842445 +-28137,-63842445 +3412,1253976194 +-980,2009785365 +16696,-63842445 +-11868,930596435 +4902,1078466156 +-17582,267529350 +-12252,964377504 +20156,963883665 +-11868,1658440922 +4779,-1995259010 +-7386,-1635102480 +-28313,51228026 +-11868,1052120431 +-980,-270600267 +-20900,1078466156 +\N,\N +20156,1165375499 +30353,-1507157031 +3412,-1196037018 +22934,-1695419330 +30353,105613996 +-17788,-872691214 +-980,-333603940 +30353,-1011627089 +-11868,-3536499 +-2407,1078466156 +23015,-217613200 +-28313,-706104224 +-980,712692345 +-11868,1456809245 +-17788,528419995 +-11868,-915441041 +-980,628784462 +30353,-1007182618 +23015,-696928205 +-980,356970043 +23015,-893234501 +-980,-465544127 +-5734,1078466156 +-980,-801821285 +26738,-2055239583 +8177,-1995259010 +-11868,1318114822 +3890,1411429004 +-6061,-586336015 +3412,-2132472060 +-15212,-2055239583 +-12252,1956403781 +5957,-1995259010 +-1787,-63842445 +20156,1855042153 +-980,1310479628 diff --git data/files/fullouter_multikey_small_1a_nonull.txt data/files/fullouter_multikey_small_1a_nonull.txt new file mode 100644 index 0000000..2a8b9a1 --- /dev/null +++ data/files/fullouter_multikey_small_1a_nonull.txt @@ -0,0 +1,90 @@ +16696,-63842445 +4586,-586336015 +26738,-2055239583 +-17788,-738743861 +-28313,-706104224 +-23457,-63842445 +-20900,1078466156 +-12252,964377504 +-28313,51228026 +-11868,-3536499 +11460,1078466156 +26944,-1995259010 +20156,1855042153 +-11868,97203778 +15061,-63842445 +-17788,528419995 +-26894,-63842445 +-28313,837320573 +20156,963883665 +-15212,-2055239583 +5957,-1995259010 +30353,-1011627089 +3890,1411429004 +-980,-333603940 +13672,-63842445 +-980,628784462 +23015,-252525791 +-11868,1052120431 +-980,356970043 +23015,-217613200 +-6061,-586336015 +-5734,1078466156 +-11868,1318114822 +23015,258882280 +-2407,1078466156 +12089,-63842445 +3412,-2132472060 +-28129,-2055239583 +-980,-270600267 +16166,931172175 +-7386,100736776 +4902,1078466156 +20969,-1995259010 +22934,-1695419330 +3412,-1249487623 +3412,1253976194 +21186,-586336015 +8177,-1995259010 +-7386,-1635102480 +-11868,1456809245 +-20125,-1995259010 +-980,-801821285 +-980,1310479628 +23015,564751472 +23015,-893234501 +4779,-1995259010 +-980,2009785365 +-24206,641361618 +30353,-1507157031 +14400,-825652334 +3412,-2081156563 +20156,-1618478138 +31443,-1968665833 +-22311,-2055239583 +30353,1241923267 +-11868,930596435 +-17788,-1361776766 +-24206,-1456409156 +-7386,-2112062470 +30353,1364268303 +23015,-1893013623 +-17788,-872691214 +30353,2044473567 +-28137,-63842445 +30353,105613996 +-6131,-1969080993 +-17582,267529350 +23015,-276888585 +-12252,1956403781 +23015,-696928205 +-11868,1658440922 +-1787,-63842445 +-11868,-915441041 +-980,-465544127 +30353,-1007182618 +-980,712692345 +20156,1165375499 +3412,-1196037018 +15404,1078466156 +-4117,-1386947816 diff --git data/files/fullouter_multikey_small_1b.txt data/files/fullouter_multikey_small_1b.txt new file mode 100644 index 0000000..b56a3f7 --- /dev/null +++ data/files/fullouter_multikey_small_1b.txt @@ -0,0 +1,118 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +\N,\N,\N,-2207.3 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +\N,\N,\N,-2.4 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 +2512-10-06 03:03:03,1560,X,-922.6951584107 +2396-04-06 15:39:02.404013577,29661,ZSMB,0.76718326 +2409-09-23 10:33:27,2638,XSXR,0.4 +2969-01-23 14:08:04.000667259,6689,TFGVOGPJF,-0.01 +2333-07-28 09:59:26,23196,RKSK,37872288434740893.5 +2409-09-23 10:33:27,2638,XSXR,-162.95 +2357-05-08 07:09:09.000482799,6226,ZSMB,-472 +2304-12-15 15:31:16,15090,G,-4319470286240016.3 +2304-12-15 15:31:16,1301,T,61.302 +2105-01-04 16:27:45,23100,ZSMB,-83.2328 +2242-08-04 07:51:46.905,20223,UCYXACQ,-0.26149 +2637-03-12 22:25:46.385,-17786,HYEGQ,-84.169614329419 +1931-12-04 11:13:47.269597392,23196,HVJCQMTQL,-9697532.8994 +2897-08-10 15:21:47.09,23663,XYUVBED,6370 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,57.62175257788037 +2145-10-15 06:58:42.831,2638,UANGISEXR,-5996.306 +2462-12-16 23:11:32.633305644,-26373,CB,67.41799 +2396-04-06 15:39:02.404013577,29661,ZSMB,-5151598.347 +2304-12-15 15:31:16,15090,G,975 +2512-10-06 03:03:03,32099,ARNZ,-0.41 +2188-06-04 15:03:14.963259704,9468,AAA,2.75496352 +2512-10-06 03:03:03,1560,X,761196.522 +2304-12-15 15:31:16,1301,T,2720.8 +1919-06-20 00:16:50.611028595,20223,ZKBC,-23 +2897-08-10 15:21:47.09,23663,XYUVBED,51.7323303273 +2086-04-09 00:03:10,20223,THXNJGFFV,-85184687349898.892 +2238-05-17 19:27:25.519,20223,KQCM,-0.01095 +2086-04-09 00:03:10,20223,THXNJGFFV,482.5383411359219 +2480-10-02 09:31:37.000770961,-26373,NBN,-5875.5197252 +2086-04-09 00:03:10,20223,THXNJGFFV,0.4396861 +2759-11-26 22:19:55.410967136,-27454,ZMY,60.6025797 +2083-06-07 09:35:19.383,-26373,MR,67892053.02376094 +2882-05-20 07:21:25.221299462,23196,U,-9951044 +2971-02-14 09:13:19,-16605,BVACIRP,-27394351.3 +2512-10-06 03:03:03,24313,QBHUG,-8423.151573236 +2882-05-20 07:21:25.221299462,23196,U,-4244.926206619 +1905-04-20 13:42:25.000469776,2638,KAUUFF,7 +2410-05-03 13:44:56,2638,PHOR,-769088.176482 +2668-06-25 07:12:37.000970744,2638,TJE,-2.7796827 +2969-01-23 14:08:04.000667259,-32485,AGEPWWLJF,-48431309405.652522 +2410-05-03 13:44:56,2638,PHOR,93262.914526611 +2512-10-06 03:03:03,13195,CRJ,14 +2018-11-25 22:27:55.84,-12202,VBDBM,98790.713907420831 +2304-12-15 15:31:16,8650,RLNO,-0.4355 +2071-07-21 20:02:32.000250697,2638,NRUV,-66198.351092 +2525-05-12 15:59:35,-24459,SAVRGA,53106747151.8633 +2637-03-12 22:25:46.385,21841,CXTI,749563668434009.65 +2018-11-25 22:27:55.84,-22419,LOTLS,342.3726040228584 +2637-03-12 22:25:46.385,21841,CXTI,7362887891522.3782 +2038-10-12 09:15:33.000539653,-19598,YKNIAJW,-642807895924.66 +2957-05-07 10:41:46,20223,OWQT,-586953.153681 +2304-12-15 15:31:16,11101,YJCKKCR,1279917802.42 +2355-09-23 19:52:34.638084141,-19598,H,92.15 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,2.1577659 +2355-09-23 19:52:34.638084141,-19598,H,74179461.880493 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-81 +\N,-12914,ZVEUKC,221 +2743-12-27 05:16:19.000573579,-12914,ZVEUKC,-811984611.5178497 +1957-02-01 14:00:29.000548421,-16085,ZVEUKC,-2312.8149 +2201-07-05 17:22:06.084206844,-24459,UBGT,1.5069483282 +2461-03-09 09:54:45.000982385,-16454,ZSMB,8694.89 +2169-04-02 06:30:32,23855,PDVQATOS,-1515597428 +2304-12-15 15:31:16,30285,GSJPSIYOU,0.2 +2913-07-17 15:06:58.041,-10206,\N,-0.2 +2169-04-02 06:30:32,23855,PDVQATOS,-4016.9608 +2759-11-26 22:19:55.410967136,-27454,ZMY,368 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,726945733.4193 +2304-12-15 15:31:16,11101,YJCKKCR,-0.5 +2462-12-16 23:11:32.633305644,-26373,CB,-582687 +2357-05-08 07:09:09.000482799,6226,ZSMB,-32.46 +2304-12-15 15:31:16,12587,OPW,-4.59489504 diff --git data/files/fullouter_string_big_1a.txt data/files/fullouter_string_big_1a.txt new file mode 100644 index 0000000..1cbcd05 --- /dev/null +++ data/files/fullouter_string_big_1a.txt @@ -0,0 +1,13 @@ +FTWURVH +QNCYBDW +UA +WXHJ +\N +WXHJ +PXLD +WXHJ +PXLD +WXHJ +WXHJ +MXGDMBD +PXLD diff --git data/files/fullouter_string_big_1a_nonull.txt data/files/fullouter_string_big_1a_nonull.txt new file mode 100644 index 0000000..a6566f2 --- /dev/null +++ data/files/fullouter_string_big_1a_nonull.txt @@ -0,0 +1,12 @@ +WXHJ +WXHJ +FTWURVH +MXGDMBD +UA +WXHJ +QNCYBDW +PXLD +PXLD +WXHJ +PXLD +WXHJ diff --git data/files/fullouter_string_big_1a_old.txt data/files/fullouter_string_big_1a_old.txt new file mode 100644 index 0000000..1fa51ad --- /dev/null +++ data/files/fullouter_string_big_1a_old.txt @@ -0,0 +1,13 @@ +WXHJ +WXHJ +WXHJ +WXHJ +WXHJ +QNCYBDW +PXLD +PXLD +PXLD +UA +\N +FTWURVH +MXGDMBD diff --git data/files/fullouter_string_small_1a.txt data/files/fullouter_string_small_1a.txt new file mode 100644 index 0000000..f223da0 --- /dev/null +++ data/files/fullouter_string_small_1a.txt @@ -0,0 +1,38 @@ +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +,1985-01-22,2111-01-10 15:44:28 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +\N,1865-11-08,2893-04-07 07:36:12 +BEP,2206-08-10,2331-10-09 10:59:51 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +IWEZJHKE,\N,\N +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +KL,1980-09-22,2073-08-25 11:51:10.318 +\N,1915-02-22,2554-10-27 09:34:30 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +GOYJHW,1959-04-27,\N +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +LOTLS,2126-09-16,1977-12-15 15:28:56 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +,2021-02-21,2802-04-21 18:48:18.5933838 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +SDA,2196-04-12,2462-10-26 19:28:12.733 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +FYW,1807-03-20,2305-08-17 01:32:44 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 diff --git data/files/fullouter_string_small_1a_nonull.txt data/files/fullouter_string_small_1a_nonull.txt new file mode 100644 index 0000000..6b97ef4 --- /dev/null +++ data/files/fullouter_string_small_1a_nonull.txt @@ -0,0 +1,35 @@ +LOTLS,2126-09-16,1977-12-15 15:28:56 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +FYW,1807-03-20,2305-08-17 01:32:44 +,2021-02-21,2802-04-21 18:48:18.5933838 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +BEP,2206-08-10,2331-10-09 10:59:51 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +,1985-01-22,2111-01-10 15:44:28 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +GOYJHW,1959-04-27,\N +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +KL,1980-09-22,2073-08-25 11:51:10.318 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +IWEZJHKE,\N,\N +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 diff --git data/files/fullouter_string_small_1a_old.txt data/files/fullouter_string_small_1a_old.txt new file mode 100644 index 0000000..505c403 --- /dev/null +++ data/files/fullouter_string_small_1a_old.txt @@ -0,0 +1,38 @@ +,2021-02-21,2802-04-21 18:48:18.5933838 +,1985-01-22,2111-01-10 15:44:28 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +KL,1980-09-22,2073-08-25 11:51:10.318 +FYW,1807-03-20,2305-08-17 01:32:44 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BEP,2141-02-19,2521-06-09 01:20:07.121 +BEP,2206-08-10,2331-10-09 10:59:51 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +IWEZJHKE,\N,\N +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +GOYJHW,1959-04-27,\N +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +ZNOUDCR,\N,1988-04-23 08:40:21 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +\N,1865-11-08,2893-04-07 07:36:12 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +\N,1915-02-22,2554-10-27 09:34:30 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +LOTLS,2126-09-16,1977-12-15 15:28:56 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java index af446db..037f754 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java @@ -13,19 +13,24 @@ */ package org.apache.hive.benchmark.vectorization.mapjoin; +import java.util.ArrayList; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.tez.ObjectCache; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateUtil; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; @@ -74,30 +79,31 @@ public void bench() throws Exception { } protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, - VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, - String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, - String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, - int[] bigTableRetainColumnNums, - int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, - SmallTableGenerationParameters smallTableGenerationParameters) throws Exception { + VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, + String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, + String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, + int[] bigTableRetainColumnNums, + int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters) throws Exception { this.vectorMapJoinVariation = vectorMapJoinVariation; this.mapJoinImplementation = mapJoinImplementation; testDesc = new MapJoinTestDescription( hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, + bigTableTypeInfos, bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); // Prepare data. Good for ANY implementation variation. - testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + testData = new MapJoinTestData(rowCount, testDesc, seed); ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl(); ObjectCache.setupObjectRegistry(objectRegistry); - + operator = setupBenchmarkImplementation( mapJoinImplementation, testDesc, testData); @@ -108,15 +114,21 @@ protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, */ if (!isVectorOutput) { - bigTableRows = VectorBatchGenerateUtil.generateRowObjectArray( - testDesc.bigTableKeyTypeInfos, testData.getBigTableBatchStream(), - testData.getBigTableBatch(), testDesc.outputObjectInspectors); + bigTableRows = testData.getBigTableBatchSource().getRandomRows(); } else { - bigTableBatches = VectorBatchGenerateUtil.generateBatchArray( - testData.getBigTableBatchStream(), testData.getBigTableBatch()); - + ArrayList bigTableBatchList = new ArrayList(); + VectorRandomBatchSource batchSource = testData.getBigTableBatchSource(); + batchSource.resetBatchIteration(); + while (true) { + VectorizedRowBatch batch = testData.createBigTableBatch(testDesc); + if (!batchSource.fillNextBatch(batch)) { + break; + } + bigTableBatchList.add(batch); + } + bigTableBatches = bigTableBatchList.toArray(new VectorizedRowBatch[0]); } } @@ -131,7 +143,6 @@ protected static MapJoinOperator setupBenchmarkImplementation( MapJoinTestData testData) throws Exception { - // UNDONE: Parameterize for implementation variation? MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc); final boolean isVectorOutput = isVectorOutput(mapJoinImplementation); @@ -141,9 +152,19 @@ protected static MapJoinOperator setupBenchmarkImplementation( (!isVectorOutput ? new CountCollectorTestOperator() : new CountVectorCollectorTestOperator()); - MapJoinOperator operator = + CreateMapJoinResult createMapJoinResult = MapJoinTestConfig.createMapJoinImplementation( - mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + mapJoinImplementation, testDesc, testData, mapJoinDesc, + /* shareMapJoinTableContainer */ null); + MapJoinOperator operator = createMapJoinResult.mapJoinOperator; + MapJoinTableContainer mapJoinTableContainer = createMapJoinResult.mapJoinTableContainer; + + // Invoke initializeOp methods. + operator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables. + operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null); + return operator; } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java index c9da92a..aa88297 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java @@ -59,7 +59,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java index a6b4719..60b2890 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java @@ -57,7 +57,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java index 1b31038..937ede1 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java @@ -57,7 +57,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 983d5a7..24a6860 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -510,6 +510,7 @@ minillaplocal.query.files=\ explainanalyze_2.q,\ explainuser_1.q,\ explainuser_4.q,\ + fullouter_mapjoin_1_optimized.q,\ groupby2.q,\ groupby_groupingset_bug.q,\ hybridgrace_hashjoin_1.q,\ @@ -756,6 +757,10 @@ minillaplocal.query.files=\ vector_create_struct_table.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ + vector_full_outer_join.q,\ + vector_fullouter_mapjoin_1_fast.q,\ + vector_fullouter_mapjoin_1_optimized.q,\ + vector_fullouter_mapjoin_1_optimized_passthru.q,\ vector_groupby_cube1.q,\ vector_groupby_grouping_id1.q,\ vector_groupby_grouping_id2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 02a67cb..3762ee5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -790,7 +790,16 @@ private boolean hasRightPairForLeft(int left, int right) { } private boolean hasAnyFiltered(int alias, List row) { - return row == dummyObj[alias] || hasFilter(alias) && JoinUtil.hasAnyFiltered(getFilterTag(row)); + if (row == dummyObj[alias]) { + return true; + } + if (hasFilter(alias) && row != null) { + ShortWritable shortWritable = (ShortWritable) row.get(row.size() - 1); + if (shortWritable != null) { + return JoinUtil.hasAnyFiltered(shortWritable.get()); + } + } + return false; } protected final boolean hasFilter(int alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index da20d2e..4efb255 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -727,6 +727,73 @@ private JSONObject outputPlan(Object work, return outputPlan(work, out, extended, jsonOutput, indent, ""); } + private boolean isInvokeVectorization(Vectorization vectorization) { + + boolean invokeFlag = true; // Assume. + + // The EXPLAIN VECTORIZATION option was specified. + final boolean desireOnly = this.work.isVectorizationOnly(); + final VectorizationDetailLevel desiredVecDetailLevel = + this.work.isVectorizationDetailLevel(); + + switch (vectorization) { + case NON_VECTORIZED: + // Display all non-vectorized leaf objects unless ONLY. + if (desireOnly) { + invokeFlag = false; + } + break; + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + if (vectorization.rank < desiredVecDetailLevel.rank) { + // This detail not desired. + invokeFlag = false; + } + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + if (desireOnly) { + if (vectorization.rank < desiredVecDetailLevel.rank) { + // Suppress headers and all objects below. + invokeFlag = false; + } + } + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + + return invokeFlag; + } + + private boolean isInvokeNonVectorization(Vectorization vectorization) { + + boolean invokeFlag = true; // Assume. + + // Do not display vectorization objects. + switch (vectorization) { + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + invokeFlag = false; + break; + case NON_VECTORIZED: + // No action. + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + // Always include headers since they contain non-vectorized objects, too. + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + + return invokeFlag; + } + @VisibleForTesting JSONObject outputPlan(Object work, PrintStream out, boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception { @@ -751,65 +818,17 @@ public JSONObject outputPlan(Object work, PrintStream out, if (extended) { invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels()); } else { - invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()); + invokeFlag = + Level.DEFAULT.in(xpl_note.explainLevels()) || + (this.work != null && this.work.isDebug() && Level.DEBUG.in(xpl_note.explainLevels())); } } if (invokeFlag) { Vectorization vectorization = xpl_note.vectorization(); if (this.work != null && this.work.isVectorization()) { - - // The EXPLAIN VECTORIZATION option was specified. - final boolean desireOnly = this.work.isVectorizationOnly(); - final VectorizationDetailLevel desiredVecDetailLevel = - this.work.isVectorizationDetailLevel(); - - switch (vectorization) { - case NON_VECTORIZED: - // Display all non-vectorized leaf objects unless ONLY. - if (desireOnly) { - invokeFlag = false; - } - break; - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - if (vectorization.rank < desiredVecDetailLevel.rank) { - // This detail not desired. - invokeFlag = false; - } - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - if (desireOnly) { - if (vectorization.rank < desiredVecDetailLevel.rank) { - // Suppress headers and all objects below. - invokeFlag = false; - } - } - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); - } + invokeFlag = isInvokeVectorization(vectorization); } else { - // Do not display vectorization objects. - switch (vectorization) { - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - invokeFlag = false; - break; - case NON_VECTORIZED: - // No action. - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - // Always include headers since they contain non-vectorized objects, too. - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); - } + invokeFlag = isInvokeNonVectorization(vectorization); } } if (invokeFlag) { @@ -887,64 +906,18 @@ public JSONObject outputPlan(Object work, PrintStream out, if (extended) { invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels()); } else { - invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()); + invokeFlag = + Level.DEFAULT.in(xpl_note.explainLevels()) || + (this.work != null && this.work.isDebug() && Level.DEBUG.in(xpl_note.explainLevels())); } } if (invokeFlag) { Vectorization vectorization = xpl_note.vectorization(); - if (this.work != null && this.work.isVectorization()) { - - // The EXPLAIN VECTORIZATION option was specified. - final boolean desireOnly = this.work.isVectorizationOnly(); - final VectorizationDetailLevel desiredVecDetailLevel = - this.work.isVectorizationDetailLevel(); - - switch (vectorization) { - case NON_VECTORIZED: - // Display all non-vectorized leaf objects unless ONLY. - if (desireOnly) { - invokeFlag = false; - } - break; - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - if (vectorization.rank < desiredVecDetailLevel.rank) { - // This detail not desired. - invokeFlag = false; - } - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - if (desireOnly) { - if (vectorization.rank < desiredVecDetailLevel.rank) { - // Suppress headers and all objects below. - invokeFlag = false; - } - } - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); - } - } else { - // Do not display vectorization objects. - switch (vectorization) { - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - invokeFlag = false; - break; - case NON_VECTORIZED: - // No action. - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - // Always include headers since they contain non-vectorized objects, too. - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + if (invokeFlag) { + if (this.work != null && this.work.isVectorization()) { + invokeFlag = isInvokeVectorization(vectorization); + } else { + invokeFlag = isInvokeNonVectorization(vectorization); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java index a914ce3..1aae142 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java @@ -140,7 +140,7 @@ public static int populateJoinKeyValue(List[] outMap, if (key == (byte) posBigTableAlias) { valueFields.add(null); } else { - valueFields.add(ExprNodeEvaluatorFactory.get(expr, conf)); + valueFields.add(expr == null ? null : ExprNodeEvaluatorFactory.get(expr, conf)); } } outMap[key] = valueFields; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index f45a012..114cea9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -27,6 +27,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.Constants; @@ -41,12 +42,16 @@ import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer; import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities; @@ -66,7 +71,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.ReflectionUtil; @@ -74,8 +81,8 @@ import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; - import com.esotericsoftware.kryo.KryoException; +import com.google.common.base.Preconditions; /** * Map side Join operator implementation. @@ -105,6 +112,23 @@ protected HybridHashTableContainer firstSmallTable; // The first small table; // Only this table has spilled big table rows + /* + * FULL OUTER MapJoin members. + */ + protected transient boolean isFullOuterMapJoin; // Are we doing a FULL OUTER MapJoin? + + protected transient int fullOuterBigTableRetainSize; + // The number of Big Table columns being + // retained in the output result for + // FULL OUTER MapJoin. + + /* + * Small Table key match tracking used for FULL OUTER MapJoin. Otherwise, null. + * Since the Small Table hash table can be shared among vertces, we require this non-shared object + * for our vertex (i.e. operator private) key match tracking. + */ + protected transient MatchTracker matchTracker; + protected transient boolean isTestingNoHashTableLoad; // Only used in bucket map join. private transient int numBuckets = -1; @@ -177,6 +201,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { hybridMapJoinLeftover = false; firstSmallTable = null; + doFullOuterMapJoinInit(); + generateMapMetaData(); isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf, @@ -252,6 +278,24 @@ protected void completeInitializationOp(Object[] os) throws HiveException { } } + /* + * Do initialization for FULL OUTER MapJoin. + * + * Currently, we do not support FULL OUTER MapJoin for N-way. + */ + private void doFullOuterMapJoinInit() { + + // This will be set during the first process call or during closeOp if no rows processed. + matchTracker = null; + + isFullOuterMapJoin = (condn.length == 1 && condn[0].getType() == JoinDesc.FULL_OUTER_JOIN); + if (isFullOuterMapJoin) { + fullOuterBigTableRetainSize = conf.getRetainList().get(posBigTable).size(); + } else { + fullOuterBigTableRetainSize = 0; + } + } + @VisibleForTesting public void setTestMapJoinTableContainer(int posSmallTable, MapJoinTableContainer testMapJoinTableContainer, @@ -415,6 +459,16 @@ public void cleanUpInputFileChangedOp() throws HiveException { return dest.setFromRow(row, joinKeys[alias], joinKeysObjectInspectors[alias]); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + protected JoinUtil.JoinResult setMapJoinKeyNoNulls( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + return dest.setFromRowNoNulls(row, joinKeys[alias], joinKeysObjectInspectors[alias], matchTracker); + } + protected MapJoinKey getRefKey(byte alias) { // We assume that since we are joining on the same key, all tables would have either // optimized or non-optimized key; hence, we can pass any key in any table as reference. @@ -437,6 +491,10 @@ public void process(Object row, int tag) throws HiveException { for (byte pos = 0; pos < order.length; pos++) { if (pos != alias) { hashMapRowGetters[pos] = mapJoinTables[pos].createGetter(refKey); + if (isFullOuterMapJoin) { + matchTracker = + ((ReusableGetAdaptorDirectAccess) hashMapRowGetters[pos]).createMatchTracker(); + } } } } @@ -463,7 +521,13 @@ public void process(Object row, int tag) throws HiveException { ReusableGetAdaptor adaptor; if (firstSetKey == null) { adaptor = firstSetKey = hashMapRowGetters[pos]; - joinResult = setMapJoinKey(firstSetKey, row, alias); + if (!isFullOuterMapJoin) { + // Normal case. + joinResult = setMapJoinKey(firstSetKey, row, alias); + } else { + // FULL OUTER MapJoin. We do not want keys with any NULLs to get tracked. + joinResult = setMapJoinKeyNoNulls(firstSetKey, row, alias, matchTracker); + } } else { // Keys for all tables are the same, so only the first has to deserialize them. adaptor = hashMapRowGetters[pos]; @@ -544,8 +608,139 @@ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object bigTable.add(row); } + /* + * For FULL OUTER MapJoin, create a key match tracker on the Small Table. + */ + private void createMatchTracker(MapJoinTableContainer smallTable) { + ReusableGetAdaptor hashMapRowGetter = smallTable.createGetter(null); + matchTracker = + ((ReusableGetAdaptorDirectAccess) hashMapRowGetter).createMatchTracker(); + Preconditions.checkState(matchTracker != null); + } + + private byte findSmallTable() { + byte smallTablePos = -1; + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + if (pos != conf.getPosBigTable()) { + smallTablePos = pos; + break; + } + } + Preconditions.checkState(smallTablePos != -1); + return smallTablePos; + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table keys and values and odd them to the + * join output result. + */ + protected void generateFullOuterSmallTableNoMatches(byte smallTablePos, + MapJoinTableContainer substituteSmallTable) throws HiveException { + + // FUTURE: Currently, in the MapJoinOperaotr, we only support FULL OUTER MapJoin for + // FUTURE MapJoinBytesTableContainer. NOTE: Vectorization code will override this method. + + if (matchTracker == null) { + + // When the process method isn't called (i.e. no rows), then we need to create the + // MatchTracker here. + // + // It will indicate no matches, of course. + // + createMatchTracker(substituteSmallTable); + } + + boolean isSmallTableValuesOnly = false; + int[] smallTableValuesIndex = conf.getValueIndex(smallTablePos); + if (smallTableValuesIndex == null) { + List valuesList = conf.getRetainList().get(smallTablePos); + smallTableValuesIndex = + ArrayUtils.toPrimitive(valuesList.toArray(new Integer[0])); + isSmallTableValuesOnly = true; + } + final int smallTableValuesIndexSize = smallTableValuesIndex.length; + + // Our first output column for Small Table results is based on order. (The Big Table columns + // will all be NULL). + final int firstOutputColumnNum = (posBigTable == (byte) 0 ? fullOuterBigTableRetainSize : 0); + + /* + * Create iterator that produces each non-matched Small Table key and a ReusableRowContainer + * the Small Table values. + */ + NonMatchedSmallTableIterator nonMatchedIterator = + substituteSmallTable.createNonMatchedSmallTableIterator(matchTracker); + int nonMatchedKeyCount = 0; + int nonMatchedValueCount = 0; + while (nonMatchedIterator.isNext()) { + List keyObjList = nonMatchedIterator.getCurrentKey(); + + MapJoinRowContainer values = nonMatchedIterator.getCurrentRows(); + AbstractRowContainer.RowIterator> iter = values.rowIter(); + for (List valueObjList = iter.first(); + valueObjList != null; + valueObjList = iter.next()) { + + // Form non-matched Small Table join result. We only fill in the Small Table columns, + // so the Big Table retained columns are NULLs from the new allocation. + + Object[] row = new Object[fullOuterBigTableRetainSize + smallTableValuesIndexSize]; + int outputColumnNum = firstOutputColumnNum; + + if (isSmallTableValuesOnly) { + for (int i = 0; i < smallTableValuesIndexSize; i++) { + row[outputColumnNum++] = valueObjList.get(smallTableValuesIndex[i]); + } + } else { + for (int i = 0; i < smallTableValuesIndexSize; i++) { + final int index = smallTableValuesIndex[i]; + + if (index >= 0) { + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + row[outputColumnNum++] = keyObjList.get(index); + } else { + + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + + int smallTableValueIndex = -index - 1; + + row[outputColumnNum++] = valueObjList.get(smallTableValueIndex); + } + } + } + + Object standardCopyRow = + ObjectInspectorUtils.copyToStandardObject( + row, outputObjInspector, ObjectInspectorCopyOption.WRITABLE); + + // FUTURE: Support residual filters for non-equi joins. + internalForward(standardCopyRow, outputObjInspector); + nonMatchedValueCount++; + } + + nonMatchedKeyCount++; + } + } + @Override public void closeOp(boolean abort) throws HiveException { + + if (isFullOuterMapJoin) { + + // FULL OUTER MapJoin: After matching the Big Table row keys against the Small Table, we now + // add any non matched Small Table key and values to the join output result. + + // FUTURE: Currently, we only support FULL OUTER MapJoin for single condition MapJoins. + byte smallTablePos = findSmallTable(); + generateFullOuterSmallTableNoMatches( + smallTablePos, + (MapJoinTableContainer) mapJoinTables[smallTablePos]); + } + boolean spilled = false; for (MapJoinTableContainer container : mapJoinTables) { if (container != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 38316bf..e064f34 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -924,51 +924,18 @@ protected long getNextCntr(long cntr) { protected void forward(Object row, ObjectInspector rowInspector) throws HiveException { - forward(row, rowInspector, false); - } - - protected void forward(VectorizedRowBatch vrg, ObjectInspector rowInspector) - throws HiveException { - forward(vrg, rowInspector, true); - } - - protected void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) - throws HiveException { - if (isVectorized) { - vectorForward((VectorizedRowBatch) row, rowInspector); - } else { - baseForward(row, rowInspector); - } - } - - private void vectorForward(VectorizedRowBatch vrg, ObjectInspector rowInspector) - throws HiveException { - this.runTimeNumRows += vrg.count(); + runTimeNumRows++; if (getDone()) { return; } - // Data structures to store original values - final int size = vrg.size; - final boolean selectedInUse = vrg.selectedInUse; - final boolean saveState = (selectedInUse && multiChildren); - if (saveState) { - System.arraycopy(vrg.selected, 0, selected, 0, size); - } - int childrenDone = 0; for (int i = 0; i < childOperatorsArray.length; i++) { Operator o = childOperatorsArray[i]; if (o.getDone()) { childrenDone++; } else { - o.process(vrg, childOperatorsTag[i]); - // Restore original values - vrg.size = size; - vrg.selectedInUse = selectedInUse; - if (saveState) { - System.arraycopy(selected, 0, vrg.selected, 0, size); - } + o.process(row, childOperatorsTag[i]); } } @@ -978,26 +945,49 @@ private void vectorForward(VectorizedRowBatch vrg, ObjectInspector rowInspector) } } - private void baseForward(Object row, ObjectInspector rowInspector) + /* + * Forward a VectorizedRowBatch to the children operators. + */ + protected void vectorForward(VectorizedRowBatch batch) throws HiveException { - this.runTimeNumRows++; + + runTimeNumRows++; if (getDone()) { return; } - int childrenDone = 0; - for (int i = 0; i < childOperatorsArray.length; i++) { - Operator o = childOperatorsArray[i]; - if (o.getDone()) { - childrenDone++; - } else { - o.process(row, childOperatorsTag[i]); - } + // Data structures to store original values + final int size = batch.size; + final boolean selectedInUse = batch.selectedInUse; + final boolean saveState = (selectedInUse && multiChildren); + if (saveState) { + System.arraycopy(batch.selected, 0, selected, 0, size); } - // if all children are done, this operator is also done - if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { - setDone(true); + final int childSize = childOperatorsArray.length; + if (childSize == 1) { + childOperatorsArray[0].process(batch, childOperatorsTag[0]); + } else { + int childrenDone = 0; + for (int i = 0; i < childOperatorsArray.length; i++) { + Operator o = childOperatorsArray[i]; + if (o.getDone()) { + childrenDone++; + } else { + o.process(batch, childOperatorsTag[i]); + + // Restore original values + batch.size = size; + batch.selectedInUse = selectedInUse; + if (saveState) { + System.arraycopy(selected, 0, batch.selected, 0, size); + } + } + } + // if all children are done, this operator is also done + if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { + setDone(true); + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index 744cdf5..cd0637a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -122,7 +122,11 @@ public void process(Object row, int tag) throws HiveException { if (conf != null && conf.isGatherStats()) { gatherStats(row); } - forward(row, inputObjInspectors[tag], vectorized); + if (vectorized) { + vectorForward((VectorizedRowBatch) row); + } else { + forward(row, inputObjInspectors[tag]); + } } private boolean checkSetDone(Object row, int tag) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java index add8bda..a6b0dbc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java @@ -466,16 +466,18 @@ public void put(KvSource kv, int keyHashCode) throws SerDeException { * @param key Key buffer. * @param offset the offset to the key in the buffer * @param hashMapResult The object to fill in that can read the values. + * @param matchTracker Opitional object for tracking key matches. * @return The state byte. */ - public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult) { + public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult, + MatchTracker matchTracker) { hashMapResult.forget(); WriteBuffers.Position readPos = hashMapResult.getReadPos(); // First, find first record for the key. - long ref = findKeyRefToRead(key, offset, length, readPos); + long ref = findKeyRefToRead(key, offset, length, readPos, matchTracker); if (ref == 0) { return 0; } @@ -500,6 +502,54 @@ public void populateValue(WriteBuffers.ByteSegmentRef valueRef) { } /** + * Finds the next non matched Small Table key and value. Supports FULL OUTER MapJoin. + * + * @param currentSlotNum Start by specifying -1; the return index from the previous call. + * @param keyRef If the return value is not -1, a reference to the key bytes. + * @param hashMapResult If the return value is not -1, the key's values. + * @param matchTracker The object that tracks matches (non-shared). + * @return The current index of the non-matched key; or -1 if no more. + */ + public int findNextNonMatched(int currentSlotNum, WriteBuffers.ByteSegmentRef keyRef, + Result hashMapResult, MatchTracker matchTracker) { + currentSlotNum++; + + hashMapResult.forget(); + + WriteBuffers.Position readPos = hashMapResult.getReadPos(); + + while (true) { + if (currentSlotNum >= refs.length) { + + // No more. + return -1; + } + long ref = refs[currentSlotNum]; + if (ref != 0 && !matchTracker.wasMatched(currentSlotNum)) { + + // An unmatched key. + writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, readPos), readPos); + int valueLength = (int) writeBuffers.readVLong(readPos); + int keyLength = (int) writeBuffers.readVLong(readPos); + long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); + + keyRef.reset(keyOffset, keyLength); + if (keyLength > 0) { + writeBuffers.populateValue(keyRef); + } + + boolean hasList = Ref.hasList(ref); + long offsetAfterListRecordKeyLen = hasList ? writeBuffers.getReadPoint(readPos) : 0; + + hashMapResult.set(this, Ref.getOffset(ref), hasList, offsetAfterListRecordKeyLen); + + return currentSlotNum; + } + currentSlotNum++; + } + } + + /** * Number of keys in the hashmap * @return number of keys */ @@ -516,8 +566,12 @@ public int getNumValues() { return numValues; } + public int getNumHashBuckets() { + return refs.length; + } + /** - * Number of bytes used by the hashmap + * Number of bytes used by the hashmap. * There are two main components that take most memory: writeBuffers and refs * Others include instance fields: 100 * @return number of bytes @@ -614,7 +668,7 @@ private int findKeySlotToWrite(long keyOffset, int keyLength, int hashCode) { * @return The ref to use for reading. */ private long findKeyRefToRead(byte[] key, int offset, int length, - WriteBuffers.Position readPos) { + WriteBuffers.Position readPos, MatchTracker matchTracker) { final int bucketMask = (refs.length - 1); int hashCode = writeBuffers.hashCode(key, offset, length); int slot = hashCode & bucketMask; @@ -629,6 +683,13 @@ private long findKeyRefToRead(byte[] key, int offset, int length, return 0; } if (isSameKey(key, offset, length, ref, hashCode, readPos)) { + + if (matchTracker != null) { + + // Support for FULL OUTER MapJoin. Track matches of the slot table entry. + matchTracker.trackMatch(slot); + } + return ref; } ++metricGetConflict; @@ -897,7 +958,7 @@ public void debugDumpTable() { dump.append(Utils.toStringBinary(key, 0, key.length)).append(" ref [").append(dumpRef(ref)) .append("]: "); Result hashMapResult = new Result(); - getValueResult(key, 0, key.length, hashMapResult); + getValueResult(key, 0, key.length, hashMapResult, null); List results = new ArrayList(); WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java index 765a647..ae84d2d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase; import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch; @@ -113,6 +114,7 @@ public void put(MapJoinKey key, MapJoinRowContainer value) { public int size() { return mHash.size(); } + @Override public Set> entrySet() { return mHash.entrySet(); @@ -141,6 +143,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not applicable"); + } + + @Override public long getEstimatedMemorySize() { // TODO: Key and Values are Object[] which can be eagerly deserialized or lazily deserialized. To accurately // estimate the entry size, every possible Objects in Key, Value should implement MemoryEstimate interface which @@ -188,6 +196,14 @@ public GetAdaptor(MapJoinKey key) { } @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapperBase kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { if (currentKey == null) { @@ -208,6 +224,12 @@ public GetAdaptor(MapJoinKey key) { } @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) { assert other instanceof GetAdaptor; GetAdaptor other2 = (GetAdaptor)other; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 13f1702..63efc62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.rowbytescontainer.VectorRowBytesContainer; import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase; @@ -50,6 +51,7 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory; @@ -63,6 +65,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Preconditions; + import com.esotericsoftware.kryo.Kryo; /** @@ -95,6 +99,9 @@ /** The OI used to deserialize values. We never deserialize keys. */ private LazyBinaryStructObjectInspector internalValueOi; + private MapJoinObjectSerDeContext keyContext; + private MapJoinObjectSerDeContext valueContext; + private AbstractSerDe keySerde; private boolean[] sortableSortOrders; private byte[] nullMarkers; private byte[] notNullMarkers; @@ -776,6 +783,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not applicable"); + } + + @Override public void seal() { for (HashPartition hp : hashPartitions) { // Only seal those partitions that haven't been spilled and cleared, @@ -834,6 +847,18 @@ public GetAdaptor() { sortableSortOrders, nullMarkers, notNullMarkers)); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapperBase kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + throw new RuntimeException("Not supported"); + } + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { @@ -850,6 +875,16 @@ public GetAdaptor() { sortableSortOrders, nullMarkers, notNullMarkers)); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) throws HiveException { assert other instanceof GetAdaptor; @@ -884,8 +919,14 @@ public MapJoinRowContainer getCurrentRows() { @Override public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - return currentValue.setDirect(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + return currentValue.setDirect( + bytes, offset, length, hashMapResult, matchTracker); + } + + @Override + public MatchTracker createMatchTracker() { + throw new RuntimeException("Not supported"); } @Override @@ -935,6 +976,10 @@ public ReusableRowContainer() { clearRows(); } + public BytesBytesMultiHashMap.Result getHashMapResult() { + return hashMapResult; + } + /* Determine if there is a match between big table row and the corresponding hashtable * Three states can be returned: * MATCH: a match is found @@ -963,10 +1008,45 @@ public ReusableRowContainer() { toSpillPartitionId = partitionId; hashMapResult.forget(); return JoinUtil.JoinResult.SPILL; - } - else { + } else { aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(output.getData(), 0, - output.getLength(), hashMapResult); + output.getLength(), hashMapResult, /* matchTracker */ null); + dummyRow = null; + if (hashMapResult.hasRows()) { + return JoinUtil.JoinResult.MATCH; + } else { + aliasFilter = (byte) 0xff; + return JoinUtil.JoinResult.NOMATCH; + } + } + } + + public JoinUtil.JoinResult setFromOutput(Output output, MatchTracker matchTracker) { + int keyHash = HashCodeUtil.murmurHash(output.getData(), 0, output.getLength()); + + if (bloom1 != null && !bloom1.testLong(keyHash)) { + /* + * if the keyHash is missing in the bloom filter, then the value cannot + * exist in any of the spilled partition - return NOMATCH + */ + dummyRow = null; + aliasFilter = (byte) 0xff; + hashMapResult.forget(); + return JoinResult.NOMATCH; + } + + partitionId = keyHash & (hashPartitions.length - 1); + + // If the target hash table is on disk, spill this row to disk as well to be processed later + if (isOnDisk(partitionId)) { + toSpillPartitionId = partitionId; + hashMapResult.forget(); + return JoinUtil.JoinResult.SPILL; + } else { + aliasFilter = hashPartitions[partitionId].hashMap.getValueResult( + output.getData(), 0, output.getLength(), + hashMapResult, + /* matchTracker */ null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -977,6 +1057,10 @@ public ReusableRowContainer() { } } + public void reset() { + hashMapResult.forget(); + } + @Override public boolean hasRows() { return hashMapResult.hasRows() || (dummyRow != null); @@ -1094,7 +1178,7 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out // Direct access. public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { int keyHash = HashCodeUtil.murmurHash(bytes, offset, length); partitionId = keyHash & (hashPartitions.length - 1); @@ -1115,8 +1199,10 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out return JoinUtil.JoinResult.SPILL; } else { - aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(bytes, offset, length, - hashMapResult); + aliasFilter = hashPartitions[partitionId].hashMap.getValueResult( + bytes, offset, length, + hashMapResult, + /* matchTracker */ null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -1168,10 +1254,27 @@ public int size() { return totalSize; } + public MapJoinObjectSerDeContext getKeyContext() { + return keyContext; + } + + public MapJoinObjectSerDeContext getValueContext() { + return valueContext; + } + @Override public void setSerde(MapJoinObjectSerDeContext keyCtx, MapJoinObjectSerDeContext valCtx) throws SerDeException { - AbstractSerDe keySerde = keyCtx.getSerDe(), valSerde = valCtx.getSerDe(); + + // Save the key and value contexts in case the MapJoinOperator needs them when creating + // a stand alone spill MapJoinBytesTableContainer. + keyContext = keyCtx; + valueContext = valCtx; + + // Save the key serde for possible use by NonMatchedSmallTableIteratorImpl. + keySerde = keyCtx.getSerDe(); + + AbstractSerDe valSerde = valCtx.getSerDe(); if (writeHelper == null) { LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java index b632e1d..0e4b8df 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase; import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch; @@ -40,6 +41,7 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; @@ -65,6 +67,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Preconditions; + /** * Table container that serializes keys and values using LazyBinarySerDe into * BytesBytesMultiHashMap, with very low memory overhead. However, @@ -88,6 +92,7 @@ * compare the large table keys correctly when we do, we need to serialize them with correct * ordering. Hence, remember the ordering here; it is null if we do use LazyBinarySerDe. */ + private AbstractSerDe keySerde; private boolean[] sortableSortOrders; private byte[] nullMarkers; private byte[] notNullMarkers; @@ -336,9 +341,17 @@ public void setKeyValue(Writable key, Writable val) { @Override public byte updateStateByte(Byte previousValue) { - if (filterGetter == null) return (byte)0xff; + if (!hasTag || filterGetter == null) { + return (byte) 0xff; + } byte aliasFilter = (previousValue == null) ? (byte)0xff : previousValue.byteValue(); - filterGetter.init((BinaryComparable)value); + BinaryComparable binaryComparableValue = (BinaryComparable) value; + if (binaryComparableValue.getLength() == 0) { + + // Skip empty values just like MapJoinEagerRowContainer.read does. + return (byte) 0xff; + } + filterGetter.init(binaryComparableValue); aliasFilter &= filterGetter.getShort(); return aliasFilter; } @@ -407,7 +420,8 @@ public long getEstimatedMemorySize() { @Override public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeContext valueContext) throws SerDeException { - AbstractSerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe(); + keySerde = keyContext.getSerDe(); + AbstractSerDe valSerde = valueContext.getSerDe(); if (writeHelper == null) { LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + valSerde.getClass().getName()); @@ -456,6 +470,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + return new NonMatchedSmallTableIteratorImpl(matchTracker); + } + + @Override public void seal() { hashMap.seal(); } @@ -541,6 +561,44 @@ public GetAdaptor() { sortableSortOrders, nullMarkers, notNullMarkers)); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapperBase kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + if (nulls == null) { + nulls = new boolean[keyOutputWriters.length]; + currentKey = new Object[keyOutputWriters.length]; + vectorKeyOIs = new ArrayList(); + for (int i = 0; i < keyOutputWriters.length; i++) { + vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector()); + } + } else { + assert nulls.length == keyOutputWriters.length; + } + boolean hasNulls = false; + for (int i = 0; i < keyOutputWriters.length; i++) { + currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]); + if (currentKey[i] == null) { + nulls[i] = true; + hasNulls = true; + } else { + nulls[i] = false; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { @@ -557,6 +615,36 @@ public GetAdaptor() { sortableSortOrders, nullMarkers, notNullMarkers)); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + if (nulls == null) { + nulls = new boolean[fields.size()]; + currentKey = new Object[fields.size()]; + } + boolean hasNulls = false; + for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) { + currentKey[keyIndex] = fields.get(keyIndex).evaluate(row); + if (currentKey[keyIndex] == null) { + nulls[keyIndex] = true; + hasNulls = true; + } else { + nulls[keyIndex] = false; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) { assert other instanceof GetAdaptor; @@ -591,8 +679,14 @@ public MapJoinRowContainer getCurrentRows() { @Override public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - return currentValue.setDirect(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + return currentValue.setDirect( + bytes, offset, length, hashMapResult, matchTracker); + } + + @Override + public MatchTracker createMatchTracker() { + return MatchTracker.create(hashMap.getNumHashBuckets()); } @Override @@ -619,6 +713,7 @@ public int directSpillPartitionId() { private final LazyBinaryStruct valueStruct; private final boolean needsComplexObjectFixup; private final ArrayList complexObjectArrayBuffer; + private final WriteBuffers.Position noResultReadPos; public ReusableRowContainer() { if (internalValueOi != null) { @@ -639,13 +734,18 @@ public ReusableRowContainer() { } uselessIndirection = new ByteArrayRef(); hashMapResult = new BytesBytesMultiHashMap.Result(); + noResultReadPos = new WriteBuffers.Position(); clearRows(); } + public BytesBytesMultiHashMap.Result getHashMapResult() { + return hashMapResult; + } + public JoinUtil.JoinResult setFromOutput(Output output) { aliasFilter = hashMap.getValueResult( - output.getData(), 0, output.getLength(), hashMapResult); + output.getData(), 0, output.getLength(), hashMapResult, /* matchTracker */ null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -653,8 +753,24 @@ public ReusableRowContainer() { aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } + } - } + public JoinUtil.JoinResult setFromOutput(Output output, MatchTracker matchTracker) { + + aliasFilter = hashMap.getValueResult( + output.getData(), 0, output.getLength(), hashMapResult, matchTracker); + dummyRow = null; + if (hashMapResult.hasRows()) { + return JoinUtil.JoinResult.MATCH; + } else { + aliasFilter = (byte) 0xff; + return JoinUtil.JoinResult.NOMATCH; + } + } + + public void reset() { + hashMapResult.forget(); + } @Override public boolean hasRows() { @@ -773,8 +889,8 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out // Direct access. public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult, matchTracker); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -785,6 +901,71 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out } } + /** + * For FULL OUTER MapJoin: Iterates through the Small Table hash table and returns the key and + * value rows for any non-matched keys. + */ + private class NonMatchedSmallTableIteratorImpl implements NonMatchedSmallTableIterator { + + private final MatchTracker matchTracker; + + private int currentIndex; + + private final WriteBuffers.ByteSegmentRef keyRef; + private final BytesWritable bytesWritable; + private final ReusableRowContainer currentValue; + + NonMatchedSmallTableIteratorImpl(MatchTracker matchTracker) { + this.matchTracker = matchTracker; + + Preconditions.checkState(keySerde != null); + + currentIndex = -1; + + keyRef = new WriteBuffers.ByteSegmentRef(); + bytesWritable = new BytesWritable(); + + currentValue = new ReusableRowContainer(); + } + + @Override + public boolean isNext() { + + // If another non-matched key is found, the key bytes will be referenced by keyRef, and + // our ReusableRowContainer's BytesBytesMultiHashMap.Result will reference the value rows. + currentIndex = + hashMap.findNextNonMatched( + currentIndex, keyRef, currentValue.getHashMapResult(), matchTracker); + return (currentIndex != -1); + } + + @Override + public List getCurrentKey() throws HiveException { + List deserializedList = + MapJoinKey.deserializeRow( + keyRef.getBytes(), + (int) keyRef.getOffset(), + keyRef.getLength(), + bytesWritable, keySerde); + return deserializedList; + } + + @Override + public ByteSegmentRef getCurrentKeyAsRef() { + return keyRef; + } + + @Override + public MapJoinRowContainer getCurrentRows() { + return currentValue; + } + + @Override + public BytesBytesMultiHashMap.Result getHashMapResult() { + return currentValue.getHashMapResult(); + } + } + public static boolean isSupportedKey(ObjectInspector keyOi) { List keyFields = ((StructObjectInspector)keyOi).getAllStructFieldRefs(); for (StructField field : keyFields) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index 2e3716c..6639536 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; /** @@ -171,4 +172,19 @@ public static Output serializeRow(Output byteStream, Object[] fieldData, } return byteStream; } + + /** + * Deserializes a key. Needed for FULL OUTER MapJoin to unpack the Small Table key when + * adding the non matched key to the join output result. + * @param BytesWritable to reuse. + */ + public static List deserializeRow(byte[] keyBytes, int keyOffset, int keyLength, + BytesWritable bytesWritable, AbstractSerDe serde) throws HiveException { + try { + bytesWritable.set(keyBytes, keyOffset, keyLength); + return (List) serde.deserialize(bytesWritable); + } catch (SerDeException e) { + throw new HiveException("Serialization error", e); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java index 345d1f4..1d7aec8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java @@ -17,11 +17,16 @@ */ package org.apache.hadoop.hive.ql.exec.persistence; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; @SuppressWarnings("deprecation") public class MapJoinObjectSerDeContext { @@ -55,6 +60,18 @@ public boolean hasFilterTag() { return hasFilter; } + public String stringify() { + StandardStructObjectInspector standardStructOI = (StandardStructObjectInspector) standardOI; + List structFields = standardStructOI.getAllStructFieldRefs(); + List typeInfoStrings = new ArrayList(); + for (StructField field : structFields) { + ObjectInspector fieldOI = field.getFieldObjectInspector(); + typeInfoStrings.add(fieldOI.getTypeName()); + } + return "[types " + typeInfoStrings.toString() + ", serde=" + serde.getClass().getName() + + ", hasFilter=" + hasFilter + "]"; + } + @Override public String toString() { return "MapJoinObjectSerDeContext [standardOI=" + standardOI + ", serde=" + serde diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java index 2c4229f..74e0b12 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; @@ -43,9 +44,17 @@ * Changes current rows to which adaptor is referring to the rows corresponding to * the key represented by a VHKW object, and writers and batch used to interpret it. */ + JoinUtil.JoinResult setFromVector(VectorHashKeyWrapperBase kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch) throws HiveException; + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapperBase kw, VectorExpressionWriter[] keyOutputWriters, + VectorHashKeyWrapperBatch keyWrapperBatch, MatchTracker matchTracker) throws HiveException; + /** * Changes current rows to which adaptor is referring to the rows corresponding to * the key represented by a row object, and fields and ois used to interpret it. @@ -53,6 +62,14 @@ JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException; + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) + throws HiveException; + /** * Changes current rows to which adaptor is referring to the rows corresponding to * the key that another adaptor has already deserialized via setFromVector/setFromRow. @@ -82,6 +99,42 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException, HiveException, IOException; /** + * For FULL OUTER MapJoin: Iterates through the Small Table hash table and returns the key and + * value rows for any non-matched keys. + */ + public interface NonMatchedSmallTableIterator { + + /** + * Return true if another non-matched key was found. + */ + boolean isNext(); + + /** + * @return The current key as a desearialized object array after a successful next() call + * that returns true. + * @throws HiveException + */ + List getCurrentKey() throws HiveException; + + /** + * @return The current key as a WriteBuffers.ByteSegmentRef after a successful next() call + * that returns true. + */ + ByteSegmentRef getCurrentKeyAsRef(); + + /** + * @return The container w/the values rows for the current key after a successful next() call + * that returns true. + */ + MapJoinRowContainer getCurrentRows(); + + /** + * @return The value rows has a BytesBytesMultiHashMap result. + */ + BytesBytesMultiHashMap.Result getHashMapResult(); + } + + /** * Indicates to the container that the puts have ended; table is now r/o. */ void seal(); @@ -94,6 +147,12 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) */ ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader); + /** + * Creates an iterator for going through the hash table and returns the key and value rows for any + * non-matched keys. Supports FULL OUTER MapJoin. + */ + NonMatchedSmallTableIterator createNonMatchedSmallTableIterator(MatchTracker matchTracker); + /** Clears the contents of the table. */ void clear(); @@ -108,7 +167,7 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) boolean hasSpill(); /** - * Return the size of the hash table + * Return the size of the hash table. */ int size(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java new file mode 100644 index 0000000..c33416e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.persistence; + +import com.google.common.base.Preconditions; + +/** + * Record which hash table slot entries had key matches for FULL OUTER MapJoin. + * Supports partitioned match trackers for HybridHashTableContainer. + */ +public final class MatchTracker { + + /* + * Regular case: + * isPartitioned = false + * The longMatchFlags array: one bit per hash table slot entry. + * If this tracker is underneath a partitioned tracker, then partitionParent is set. + * + * Partitioned case: + * isPartitioned = true + * The partitions array: a tracker for the currently active partitions. + */ + private final boolean isPartitioned; + private final MatchTracker partitionParent; + private final long[] longMatchFlags; + private final MatchTracker[] partitions; + + private MatchTracker(boolean isPartitioned, MatchTracker partitionParent, int count) { + this.isPartitioned = isPartitioned; + this.partitionParent = partitionParent; + if (!isPartitioned) { + final int longMatchFlagsSize = (count + Long.SIZE - 1) / Long.SIZE; + longMatchFlags = new long[longMatchFlagsSize]; + partitions = null; + } else { + longMatchFlags = null; + partitions = new MatchTracker[count]; + } + } + + /* + * Create a regular tracker. + */ + public static MatchTracker create(int logicalHashBucketCount) { + return new MatchTracker(false, null, logicalHashBucketCount); + } + + /* + * Create a partitioned tracker. Use addPartition and clearPartition to maintain the currently + * active partition trackers. + */ + public static MatchTracker createPartitioned(int partitionCount) { + return new MatchTracker(true, null, partitionCount); + } + + public boolean getIsPartitioned() { + return isPartitioned; + } + + public void addPartition(int partitionId, int logicalHashBucketCount) { + partitions[partitionId] = new MatchTracker(false, this, logicalHashBucketCount); + } + + public void clearPartition(int partitionId) { + partitions[partitionId] = null; + } + + public MatchTracker getPartition(int partitionId) { + return partitions[partitionId]; + } + + private boolean isFirstMatch; + + public boolean getIsFirstMatch() { + return isFirstMatch; + } + + /* + * Track a regular hash table slot match. + * If this tracker is underneath a partitioned tracker, the partitioned tracker's first-match + * flag will be updated. + */ + public void trackMatch(int logicalSlotNum) { + + Preconditions.checkState(!isPartitioned); + + final int longWordIndex = logicalSlotNum / Long.SIZE; + final long longBitMask = 1L << (logicalSlotNum % Long.SIZE); + if ((longMatchFlags[longWordIndex] & longBitMask) != 0) { + + // Flag is already on. + isFirstMatch = false; + } else { + longMatchFlags[longWordIndex] |= longBitMask; + isFirstMatch = true; + } + if (partitionParent != null) { + + // Push match flag up. + partitionParent.isFirstMatch = isFirstMatch; + } + } + + /* + * Track a partitioned hash table slot match. + */ + public void trackPartitionMatch(int partitionId, int logicalSlotNum) { + partitions[partitionId].trackMatch(logicalSlotNum); + } + + /* + * Was a regular hash table slot matched? + */ + public boolean wasMatched(int logicalSlotNum) { + final int longWordIndex = logicalSlotNum / Long.SIZE; + final long longBitMask = 1L << (logicalSlotNum % Long.SIZE); + return (longMatchFlags[longWordIndex] & longBitMask) != 0; + } + + /* + * Was a partitioned hash table slot matched? + */ + public boolean wasPartitionMatched(int partitionId, int logicalSlotNum) { + return partitions[partitionId].wasMatched(logicalSlotNum); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java index 3303cc4..84ffaec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java @@ -20,11 +20,14 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.serde2.WriteBuffers; public interface ReusableGetAdaptorDirectAccess { JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker); int directSpillPartitionId(); + + MatchTracker createMatchTracker(); } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java index 95400c8..0ff54ff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; /** @@ -70,6 +71,8 @@ public MapJoinRowContainer setInternal(MapJoinRowContainer internal, Object[] cu return unwrap(iterator.next()); } + private static final ShortWritable ALL_ALIAS_FILTER_SHORT_WRITABLE = new ShortWritable((byte) 0xff); + private List unwrap(List values) { if (values == null) { return null; @@ -90,7 +93,14 @@ public MapJoinRowContainer setInternal(MapJoinRowContainer internal, Object[] cu } } if (tagged) { - unwrapped.add(values.get(values.size() - 1)); // append filter tag + + // Append filter tag. + final int size = values.size(); + if (size == 0) { + unwrapped.add(ALL_ALIAS_FILTER_SHORT_WRITABLE); + } else { + unwrapped.add(values.get(size - 1)); + } } return unwrapped; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 2cccb44..152dc98 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -262,7 +262,7 @@ public DynamicValueRegistryTez call() { e.getMessage()); throw (InterruptedException) e; } else { - throw new RuntimeException("Reduce operator initialization failed", e); + throw new RuntimeException(redWork.getName() + " operator initialization failed", e); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 70d6468..5698639 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -451,6 +451,11 @@ private void processVectorGroup(BytesWritable keyWritable, } reducer.process(batch, tag); + // Do the non-column batch reset logic. + batch.selectedInUse = false; + batch.size = 0; + batch.endOfFile = false; + // Reset just the value columns and value buffer. for (int i = firstValueColumnOffset; i < batch.numCols; i++) { // Note that reset also resets the data buffer for bytes column vectors. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java index c4503ad..f2400b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java @@ -136,7 +136,7 @@ public void process(Object data, int tag) throws HiveException { throw new HiveException(e); } - forward(data, rowInspector, true); + forward(data, rowInspector); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index fc675c5..19a3eed 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -995,6 +996,17 @@ public void assignRow(VectorizedRowBatch batch, int batchIndex, Object[] objects } } + public void assignRow(VectorizedRowBatch batch, int batchIndex, ArrayList objectList) { + final int count = isConvert.length; + for (int i = 0; i < count; i++) { + if (isConvert[i]) { + assignConvertRowColumn(batch, batchIndex, i, objectList.get(i)); + } else { + assignRowColumn(batch, batchIndex, i, objectList.get(i)); + } + } + } + /* * Assign a row from a list of standard objects up to a count */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index bedc12a..0cf8491 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -259,14 +259,27 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa private CopyRow[] subRowToBatchCopiersByReference; public void init(VectorColumnMapping columnMapping) throws HiveException { - int count = columnMapping.getCount(); + init( + columnMapping.getInputColumns(), + columnMapping.getOutputColumns(), + columnMapping.getTypeInfos()); + } + + public void init(int[] columnMap, TypeInfo[] typeInfos) throws HiveException { + init(columnMap, columnMap, typeInfos); + } + + public void init(int[] inputColumnMap, int[] outputColumnMap, TypeInfo[] typeInfos) + throws HiveException { + + final int count = inputColumnMap.length; subRowToBatchCopiersByValue = new CopyRow[count]; subRowToBatchCopiersByReference = new CopyRow[count]; for (int i = 0; i < count; i++) { - int inputColumn = columnMapping.getInputColumns()[i]; - int outputColumn = columnMapping.getOutputColumns()[i]; - TypeInfo typeInfo = columnMapping.getTypeInfos()[i]; + int inputColumn = inputColumnMap[i]; + int outputColumn = outputColumnMap[i]; + TypeInfo typeInfo = typeInfos[i]; Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); CopyRow copyRowByValue = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 55f3556..97166ec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -444,6 +444,38 @@ public void init(boolean[] columnsToIncludeTruncated) throws HiveException { } + public void init(int[] outputColumns, boolean[] columnsToInclude) throws HiveException { + + Preconditions.checkState( + outputColumns.length == columnsToInclude.length); + + final int columnCount = sourceTypeInfos.length; + allocateArrays(columnCount); + + int includedCount = 0; + final int[] includedIndices = new int[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + if (!columnsToInclude[i]) { + + // Field not included in query. + + } else { + + initTopLevelField(i, outputColumns[i], sourceTypeInfos[i], dataTypePhysicalVariations[i]); + includedIndices[includedCount++] = i; + } + } + + // Optimizing for readField? + if (includedCount < columnCount && deserializeRead.isReadFieldSupported()) { + useReadField = true; + readFieldLogicalIndices = Arrays.copyOf(includedIndices, includedCount); + } + + } + /** * Initialize for converting the source data type that are going to be read with the * DeserializedRead interface passed to the constructor to the target data types desired in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 14ac8ee..73965ad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -133,7 +133,7 @@ public void process(Object row, int tag) throws HiveException { // All are selected, do nothing } if (vrg.size > 0) { - forward(vrg, null, true); + vectorForward(vrg); } // Restore the original selected vector diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 7816cbb..a516d60 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -1173,7 +1173,7 @@ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buff } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index 051d338..7edb059 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -88,7 +88,7 @@ public void process(Object row, int tag) throws HiveException { batch.selected[i] = batch.selected[skipSize + i]; } } - forward(row, inputObjInspectors[tag], true); + vectorForward(batch); currCount += batch.size; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java index 497b12d..127a223 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -25,6 +26,7 @@ import java.util.Map; import java.util.concurrent.Future; +import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -34,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -132,7 +135,8 @@ public VectorizationContext getInputVectorizationContext() { int[] smallTableIndices; int smallTableIndicesSize; List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); - if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + if (desc.getValueIndices() != null && + desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); smallTableIndicesSize = smallTableIndices.length; } else { @@ -141,7 +145,8 @@ public VectorizationContext getInputVectorizationContext() { } List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); - final int smallTableRetainSize = smallTableRetainList.size(); + final int smallTableRetainSize = + (smallTableRetainList != null ? smallTableRetainList.size() : 0); int smallTableResultSize = 0; if (smallTableIndicesSize > 0) { @@ -216,6 +221,7 @@ public VectorizationContext getInputVectorizationContext() { return outputTypeInfos; } + @Override public void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); @@ -234,7 +240,6 @@ public void initializeOp(Configuration hconf) throws HiveException { */ @Override protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { - Object[] values = (Object[]) row; VectorAssignRow va = outputVectorAssignRowMap.get(outputOI); if (va == null) { va = new VectorAssignRow(); @@ -242,7 +247,11 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive outputVectorAssignRowMap.put(outputOI, va); } - va.assignRow(outputBatch, outputBatch.size, values); + if (row instanceof ArrayList) { + va.assignRow(outputBatch, outputBatch.size, (ArrayList) row); + } else { + va.assignRow(outputBatch, outputBatch.size, (Object[]) row); + } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { @@ -251,7 +260,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } @@ -263,8 +272,10 @@ public void closeOp(boolean aborted) throws HiveException { tableContainer.dumpMetrics(); } } - if (!aborted && 0 < outputBatch.size) { - flushOutput(); + if (!aborted) { + if (outputBatch.size > 0) { + flushOutput(); + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 2d8e1d7..5051ad9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -19,8 +19,13 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -28,6 +33,7 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -183,11 +189,6 @@ protected Object _evaluate(Object row, int version) throws HiveException { } // Now replace the old evaluators with our own joinValues[posBigTable] = vectorNodeEvaluators; - - // Filtering is handled in the input batch processing - if (filterMaps != null) { - filterMaps[posBigTable] = null; - } } @Override @@ -196,6 +197,18 @@ protected Object _evaluate(Object row, int version) throws HiveException { return dest.setFromVector(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + protected JoinUtil.JoinResult setMapJoinKeyNoNulls(ReusableGetAdaptor dest, Object row, byte alias, + MatchTracker matchTracker) + throws HiveException { + return dest.setFromVectorNoNulls(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch, + matchTracker); + } + @Override public void process(Object row, int tag) throws HiveException { @@ -242,6 +255,11 @@ public void process(Object row, int tag) throws HiveException { } @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + } + + @Override protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException { // Extract the actual row from row batch diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index 07a6e9d..267d0dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -326,7 +326,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 22d2f34..2f296c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -136,7 +136,7 @@ public void process(Object row, int tag) throws HiveException { // Just forward the row as is if (conf.isSelStarNoCompute()) { - forward(row, inputObjInspectors[tag], true); + vectorForward((VectorizedRowBatch) row); return; } @@ -155,7 +155,7 @@ public void process(Object row, int tag) throws HiveException { int originalProjectionSize = vrg.projectionSize; vrg.projectionSize = projectedOutputColumns.length; vrg.projectedColumns = this.projectedOutputColumns; - forward(vrg, outputObjInspector, true); + vectorForward((VectorizedRowBatch) row); // Revert the projected columns back, because vrg will be re-used. vrg.projectionSize = originalProjectionSize; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java index e28afee..a211308 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java @@ -128,7 +128,7 @@ public void process(Object data, int tag) throws HiveException { // Forward the result if (size > 0) { - forward(batch, null, true); + vectorForward(batch); } // Restore the original selected vector diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index c832cdb..5733688 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -21,7 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; @@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorCopyRow; import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; @@ -55,14 +54,17 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -124,6 +126,10 @@ protected void initLoggingPrefix(String className) { // a mixture of input big table columns and new scratch columns. protected VectorizationContext vOutContext; + protected VectorMapJoinVariation vectorMapJoinVariation; + protected HashTableKind hashTableKind; + protected HashTableKeyType hashTableKeyType; + // The output column projection of the vectorized row batch. And, the type infos of the output // columns. protected int[] outputProjection; @@ -149,28 +155,70 @@ protected void initLoggingPrefix(String className) { protected String[] bigTableValueColumnNames; protected TypeInfo[] bigTableValueTypeInfos; - // This is a mapping of which big table columns (input and key/value expressions) will be - // part of the big table portion of the join output result. - protected VectorColumnOutputMapping bigTableRetainedMapping; + /* + * NOTE: + * The Big Table key columns are from the key expressions. + * The Big Table value columns are from the getExpr(posBigTable) expressions. + * Any calculations needed for those will be scratch columns. + * + * The Small Table key and value output columns are scratch columns. + * + * Big Table Retain Column Map / TypeInfos: + * Any Big Table Batch columns that will be in the output result. + * 0, 1, ore more Column Nums and TypeInfos + * + * Non Outer Small Table Key Mapping: + * For non-[FULL] OUTER MapJoin, when Big Table key columns are not retained for the output + * result but are needed for the Small Table output result, they are put in this mapping + * as they are required for copying rows to the overflow batch. + * + * Outer Small Table Key Mapping + * For [FULL] OUTER MapJoin, the mapping for any Small Table key columns needed for the + * output result from the Big Table key columns. The Big Table keys cannot be projected since + * on NOMATCH there must be a physical column present to hold the non-match NULL. + * + * Full Outer Small Table Key Mapping + * For FULL OUTER MapJoin, the mapping from any needed Small Table key columns to their area + * in the output result. + * + * For deserializing a FULL OUTER non-match Small Table key into the output result. + * Can be partial or empty if some or all Small Table key columns are not retained. + * + * Small Table Value Mapping + * The mapping from Small Table value columns to their area in the output result. + * + * For deserializing Small Table value into the output result. + * + * It is the Small Table value index to output column numbers and TypeInfos. + * That is, a mapping of the LazyBinary field order to output batch scratch columns for the + * small table portion. + * Or, to use the output column nums for OUTER Small Table value NULLs. + * + */ + protected int[] bigTableRetainColumnMap; + protected TypeInfo[] bigTableRetainTypeInfos; + + protected int[] nonOuterSmallTableKeyColumnMap; + protected TypeInfo[] nonOuterSmallTableKeyTypeInfos; - // This is a mapping of which keys will be copied from the big table (input and key expressions) - // to the small table result portion of the output for outer join. - protected VectorColumnOutputMapping bigTableOuterKeyMapping; + protected VectorColumnOutputMapping outerSmallTableKeyMapping; - // This is a mapping of the values in the small table hash table that will be copied to the - // small table result portion of the output. That is, a mapping of the LazyBinary field order - // to output batch scratch columns for the small table portion. - protected VectorColumnSourceMapping smallTableMapping; + protected VectorColumnSourceMapping fullOuterSmallTableKeyMapping; + protected VectorColumnSourceMapping smallTableValueMapping; + + // The MapJoin output result projection for both the Big Table input batch and the overflow batch. protected VectorColumnSourceMapping projectionMapping; // These are the output columns for the small table and the outer small table keys. - protected int[] smallTableOutputVectorColumns; - protected int[] bigTableOuterKeyOutputVectorColumns; + protected int[] outerSmallTableKeyColumnMap; + protected int[] smallTableValueColumnMap; // These are the columns in the big and small table that are ByteColumnVector columns. // We create data buffers for these columns so we can copy strings into those columns by value. protected int[] bigTableByteColumnVectorColumns; + protected int[] nonOuterSmallTableKeyByteColumnVectorColumns; + protected int[] outerSmallTableKeyByteColumnVectorColumns; protected int[] smallTableByteColumnVectorColumns; // The above members are initialized by the constructor and must not be @@ -186,13 +234,22 @@ protected void initLoggingPrefix(String className) { // portion of the join output. protected transient VectorCopyRow bigTableRetainedVectorCopy; + // This helper object deserializes BinarySortable format small table keys into columns of a row + // in a vectorized row batch. + protected int[] allSmallTableKeyColumnNums; + protected boolean[] allSmallTableKeyColumnIncluded; + protected transient VectorDeserializeRow smallTableKeyOuterVectorDeserializeRow; + + protected transient VectorCopyRow nonOuterSmallTableKeyVectorCopy; + + // UNDONE // A helper object that efficiently copies the big table key columns (input or key expressions) - // that appear in the small table portion of the join output for outer joins. - protected transient VectorCopyRow bigTableVectorCopyOuterKeys; + // that appear in the small table portion of the join output. + protected transient VectorCopyRow outerSmallTableKeyVectorCopy; // This helper object deserializes LazyBinary format small table values into columns of a row // in a vectorized row batch. - protected transient VectorDeserializeRow smallTableVectorDeserializeRow; + protected transient VectorDeserializeRow smallTableValueVectorDeserializeRow; // This a 2nd batch with the same "column schema" as the big table batch that can be used to // build join output results in. If we can create some join output results in the big table @@ -207,6 +264,9 @@ protected void initLoggingPrefix(String className) { // Whether the native vectorized map join operator has performed its common setup. protected transient boolean needCommonSetup; + // Whether the native vectorized map join operator has performed its first batch setup. + protected transient boolean needFirstBatchSetup; + // Whether the native vectorized map join operator has performed its // native vector map join hash table setup. protected transient boolean needHashTableSetup; @@ -214,6 +274,9 @@ protected void initLoggingPrefix(String className) { // The small table hash table for the native vectorized map join operator. protected transient VectorMapJoinHashTable vectorMapJoinHashTable; + protected transient long batchCounter; + protected transient long rowCounter; + /** Kryo ctor. */ protected VectorMapJoinCommonOperator() { super(); @@ -246,9 +309,9 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); isOuterJoin = !desc.getNoOuterJoin(); - Map> filterExpressions = desc.getFilters(); - bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), - VectorExpressionDescriptor.Mode.FILTER); + vectorMapJoinVariation = this.vectorDesc.getVectorMapJoinVariation(); + hashTableKind = this.vectorDesc.getHashTableKind(); + hashTableKeyType = this.vectorDesc.getHashTableKeyType(); bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames(); @@ -260,11 +323,19 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos(); bigTableValueExpressions = vectorMapJoinInfo.getSlimmedBigTableValueExpressions(); - bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping(); + bigTableFilterExpressions = vectorMapJoinInfo.getBigTableFilterExpressions(); + + bigTableRetainColumnMap = vectorMapJoinInfo.getBigTableRetainColumnMap(); + bigTableRetainTypeInfos = vectorMapJoinInfo.getBigTableRetainTypeInfos(); + + nonOuterSmallTableKeyColumnMap = vectorMapJoinInfo.getNonOuterSmallTableKeyColumnMap(); + nonOuterSmallTableKeyTypeInfos = vectorMapJoinInfo.getNonOuterSmallTableKeyTypeInfos(); + + outerSmallTableKeyMapping = vectorMapJoinInfo.getOuterSmallTableKeyMapping(); - bigTableOuterKeyMapping = vectorMapJoinInfo.getBigTableOuterKeyMapping(); + fullOuterSmallTableKeyMapping = vectorMapJoinInfo.getFullOuterSmallTableKeyMapping(); - smallTableMapping = vectorMapJoinInfo.getSmallTableMapping(); + smallTableValueMapping = vectorMapJoinInfo.getSmallTableValueMapping(); projectionMapping = vectorMapJoinInfo.getProjectionMapping(); @@ -273,47 +344,96 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, protected void determineCommonInfo(boolean isOuter) throws HiveException { - bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns(); - smallTableOutputVectorColumns = smallTableMapping.getOutputColumns(); + outerSmallTableKeyColumnMap = outerSmallTableKeyMapping.getOutputColumns(); + + smallTableValueColumnMap = smallTableValueMapping.getOutputColumns(); // Which big table and small table columns are ByteColumnVector and need have their data buffer // to be manually reset for some join result processing? - bigTableByteColumnVectorColumns = getByteColumnVectorColumns(bigTableOuterKeyMapping); + bigTableByteColumnVectorColumns = + getByteColumnVectorColumns(bigTableRetainColumnMap, bigTableRetainTypeInfos); + + nonOuterSmallTableKeyByteColumnVectorColumns = + getByteColumnVectorColumns(nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos); + + outerSmallTableKeyByteColumnVectorColumns = + getByteColumnVectorColumns(outerSmallTableKeyMapping); - smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping); + smallTableByteColumnVectorColumns = + getByteColumnVectorColumns(smallTableValueMapping); outputProjection = projectionMapping.getOutputColumns(); outputTypeInfos = projectionMapping.getTypeInfos(); - if (LOG.isDebugEnabled()) { + if (LOG.isInfoEnabled()) { int[] orderDisplayable = new int[order.length]; for (int i = 0; i < order.length; i++) { orderDisplayable[i] = (int) order[i]; } - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); + LOG.info(getLoggingPrefix() + " order " + + Arrays.toString(orderDisplayable)); + LOG.info(getLoggingPrefix() + " posBigTable " + + (int) posBigTable); + LOG.info(getLoggingPrefix() + " posSingleVectorMapJoinSmallTable " + + (int) posSingleVectorMapJoinSmallTable); + + LOG.info(getLoggingPrefix() + " bigTableKeyColumnMap " + + Arrays.toString(bigTableKeyColumnMap)); + LOG.info(getLoggingPrefix() + " bigTableKeyColumnNames " + + Arrays.toString(bigTableKeyColumnNames)); + LOG.info(getLoggingPrefix() + " bigTableKeyTypeInfos " + + Arrays.toString(bigTableKeyTypeInfos)); + + LOG.info(getLoggingPrefix() + " bigTableValueColumnMap " + + Arrays.toString(bigTableValueColumnMap)); + LOG.info(getLoggingPrefix() + " bigTableValueColumnNames " + + Arrays.toString(bigTableValueColumnNames)); + LOG.info(getLoggingPrefix() + " bigTableValueTypeNames " + + Arrays.toString(bigTableValueTypeInfos)); + + LOG.info(getLoggingPrefix() + " getBigTableRetainColumnMap " + + Arrays.toString(bigTableRetainColumnMap)); + LOG.info(getLoggingPrefix() + " bigTableRetainTypeInfos " + + Arrays.toString(bigTableRetainTypeInfos)); + + LOG.info(getLoggingPrefix() + " nonOuterSmallTableKeyColumnMap " + + Arrays.toString(nonOuterSmallTableKeyColumnMap)); + LOG.info(getLoggingPrefix() + " nonOuterSmallTableKeyTypeInfos " + + Arrays.toString(nonOuterSmallTableKeyTypeInfos)); + + LOG.info(getLoggingPrefix() + " outerSmallTableKeyMapping " + + outerSmallTableKeyMapping.toString()); + + LOG.info(getLoggingPrefix() + " fullOuterSmallTableKeyMapping " + + fullOuterSmallTableKeyMapping.toString()); + + LOG.info(getLoggingPrefix() + " smallTableValueMapping " + + smallTableValueMapping.toString()); + + LOG.info(getLoggingPrefix() + " bigTableByteColumnVectorColumns " + + Arrays.toString(bigTableByteColumnVectorColumns)); + LOG.info(getLoggingPrefix() + " smallTableByteColumnVectorColumns " + + Arrays.toString(smallTableByteColumnVectorColumns)); + + LOG.info(getLoggingPrefix() + " outputProjection " + + Arrays.toString(outputProjection)); + LOG.info(getLoggingPrefix() + " outputTypeInfos " + + Arrays.toString(outputTypeInfos)); + + LOG.info(getLoggingPrefix() + " mapJoinDesc.getKeysString " + + conf.getKeysString()); + if (conf.getValueIndices() != null) { + for (Entry entry : conf.getValueIndices().entrySet()) { + LOG.info(getLoggingPrefix() + " mapJoinDesc.getValueIndices +" + + (int) entry.getKey() + " " + Arrays.toString(entry.getValue())); + } + } + LOG.info(getLoggingPrefix() + " mapJoinDesc.getExprs " + + conf.getExprs().toString()); + LOG.info(getLoggingPrefix() + " mapJoinDesc.getRetainList " + + conf.getRetainList().toString()); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); } setupVOutContext(conf.getOutputColumnNames()); @@ -323,11 +443,14 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { * Determine from a mapping which columns are BytesColumnVector columns. */ private int[] getByteColumnVectorColumns(VectorColumnMapping mapping) { + return getByteColumnVectorColumns(mapping.getOutputColumns(), mapping.getTypeInfos()); + } + + private int[] getByteColumnVectorColumns(int[] outputColumns, TypeInfo[] typeInfos) { + // Search mapping for any strings and return their output columns. ArrayList list = new ArrayList(); - int count = mapping.getCount(); - int[] outputColumns = mapping.getOutputColumns(); - TypeInfo[] typeInfos = mapping.getTypeInfos(); + final int count = outputColumns.length; for (int i = 0; i < count; i++) { int outputColumn = outputColumns[i]; String typeName = typeInfos[i].getTypeName(); @@ -345,10 +468,12 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { */ protected void setupVOutContext(List outputColumnNames) { if (LOG.isDebugEnabled()) { - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); + LOG.debug(getLoggingPrefix() + " outputColumnNames " + outputColumnNames); } if (outputColumnNames.size() != outputProjection.length) { - throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeInfos) + " length mismatch"); + throw new RuntimeException("Output column names " + outputColumnNames + + " length and output projection " + Arrays.toString(outputProjection) + + " / " + Arrays.toString(outputTypeInfos) + " length mismatch"); } vOutContext.resetProjectionColumns(); for (int i = 0; i < outputColumnNames.size(); ++i) { @@ -357,7 +482,8 @@ protected void setupVOutContext(List outputColumnNames) { vOutContext.addProjectionColumn(columnName, outputColumn); if (LOG.isDebugEnabled()) { - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor addProjectionColumn " + i + " columnName " + columnName + " outputColumn " + outputColumn); + LOG.debug(getLoggingPrefix() + " addProjectionColumn " + i + " columnName " + columnName + + " outputColumn " + outputColumn); } } } @@ -386,9 +512,50 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { return hashTableLoader; } + /* + * Do FULL OUTER MapJoin operator initialization. + */ + private void initializeFullOuterObjects() throws HiveException { + + // The Small Table key type jnfo is the same as Big Table's. + TypeInfo[] smallTableKeyTypeInfos = bigTableKeyTypeInfos; + final int allKeysSize = smallTableKeyTypeInfos.length; + + /* + * The VectorMapJoinFullOuter{Long|MultiKey|String}Operator outputs 0, 1, or more + * Small Key columns in the join result. + */ + allSmallTableKeyColumnNums = new int[allKeysSize]; + Arrays.fill(allSmallTableKeyColumnNums, -1); + allSmallTableKeyColumnIncluded = new boolean[allKeysSize]; + + final int outputKeysSize = fullOuterSmallTableKeyMapping.getCount(); + int[] outputKeyNums = fullOuterSmallTableKeyMapping.getInputColumns(); + int[] outputKeyOutputColumns = fullOuterSmallTableKeyMapping.getOutputColumns(); + for (int i = 0; i < outputKeysSize; i++) { + final int outputKeyNum = outputKeyNums[i]; + allSmallTableKeyColumnNums[outputKeyNum] = outputKeyOutputColumns[i]; + allSmallTableKeyColumnIncluded[outputKeyNum] = true; + } + + if (hashTableKeyType == HashTableKeyType.MULTI_KEY && + outputKeysSize > 0) { + + smallTableKeyOuterVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + smallTableKeyTypeInfos, + /* useExternalBuffer */ true)); + smallTableKeyOuterVectorDeserializeRow.init( + allSmallTableKeyColumnNums, allSmallTableKeyColumnIncluded); + } + } + @Override protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + VectorExpression.doTransientInit(bigTableFilterExpressions); VectorExpression.doTransientInit(bigTableKeyExpressions); VectorExpression.doTransientInit(bigTableValueExpressions); @@ -405,23 +572,34 @@ protected void initializeOp(Configuration hconf) throws HiveException { /* * Create our vectorized copy row and deserialize row helper objects. */ - if (smallTableMapping.getCount() > 0) { - smallTableVectorDeserializeRow = + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + initializeFullOuterObjects(); + } + + if (smallTableValueMapping.getCount() > 0) { + smallTableValueVectorDeserializeRow = new VectorDeserializeRow( new LazyBinaryDeserializeRead( - smallTableMapping.getTypeInfos(), + smallTableValueMapping.getTypeInfos(), /* useExternalBuffer */ true)); - smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns()); + smallTableValueVectorDeserializeRow.init(smallTableValueMapping.getOutputColumns()); } - if (bigTableRetainedMapping.getCount() > 0) { + if (bigTableRetainColumnMap.length > 0) { bigTableRetainedVectorCopy = new VectorCopyRow(); - bigTableRetainedVectorCopy.init(bigTableRetainedMapping); + bigTableRetainedVectorCopy.init( + bigTableRetainColumnMap, bigTableRetainTypeInfos); } - if (bigTableOuterKeyMapping.getCount() > 0) { - bigTableVectorCopyOuterKeys = new VectorCopyRow(); - bigTableVectorCopyOuterKeys.init(bigTableOuterKeyMapping); + if (nonOuterSmallTableKeyColumnMap.length > 0) { + nonOuterSmallTableKeyVectorCopy = new VectorCopyRow(); + nonOuterSmallTableKeyVectorCopy.init( + nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos); + } + + if (outerSmallTableKeyMapping.getCount() > 0) { + outerSmallTableKeyVectorCopy = new VectorCopyRow(); + outerSmallTableKeyVectorCopy.init(outerSmallTableKeyMapping); } /* @@ -430,6 +608,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { overflowBatch = setupOverflowBatch(); needCommonSetup = true; + needFirstBatchSetup = true; needHashTableSetup = true; if (LOG.isDebugEnabled()) { @@ -553,29 +732,46 @@ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, } /* - * Common one time setup by native vectorized map join operator's processOp. + * Common one time setup for Native Vector MapJoin operator. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { + protected void commonSetup() throws HiveException { - if (LOG.isDebugEnabled()) { - LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin..."); - displayBatchColumns(batch, "batch"); - displayBatchColumns(overflowBatch, "overflowBatch"); + /* + * Make sure big table BytesColumnVectors have room for string values in the overflow batch... + */ + for (int column: bigTableByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); } - // Make sure big table BytesColumnVectors have room for string values in the overflow batch... - for (int column: bigTableByteColumnVectorColumns) { + for (int column : nonOuterSmallTableKeyByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); + } + + for (int column : outerSmallTableKeyByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; bytesColumnVector.initBuffer(); } + for (int column: smallTableByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); + } + + batchCounter = 0; + rowCounter = 0; + } + + /* + * Common one time setup by native vectorized map join operator's first batch. + */ + public void firstBatchSetup(VectorizedRowBatch batch) throws HiveException { // Make sure small table BytesColumnVectors have room for string values in the big table and // overflow batchs... for (int column: smallTableByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[column]; bytesColumnVector.initBuffer(); - bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; - bytesColumnVector.initBuffer(); } // Setup a scratch batch that will be used to play back big table rows that were spilled @@ -583,6 +779,67 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { spillReplayBatch = VectorizedBatchUtil.makeLike(batch); } + /* + * Perform any Native Vector MapJoin operator specific hash table setup. + */ + public void hashTableSetup() throws HiveException { + } + + /* + * Perform the Native Vector MapJoin operator work. + */ + public abstract void processBatch(VectorizedRowBatch batch) throws HiveException; + + /* + * Common process method for all Native Vector MapJoin operators. + * + * Do common initialization work and invoke the override-able common setup methods. + * + * Then, invoke the processBatch override method to do the operator work. + */ + @Override + public void process(Object row, int tag) throws HiveException { + + VectorizedRowBatch batch = (VectorizedRowBatch) row; + alias = (byte) tag; + + if (needCommonSetup) { + + // Our one time process method initialization. + commonSetup(); + + needCommonSetup = false; + } + + if (needFirstBatchSetup) { + + // Our one time first-batch method initialization. + firstBatchSetup(batch); + + needFirstBatchSetup = false; + } + + if (needHashTableSetup) { + + // Setup our hash table specialization. It will be the first time the process + // method is called, or after a Hybrid Grace reload. + + hashTableSetup(); + + needHashTableSetup = false; + } + + batchCounter++; + + if (batch.size == 0) { + return; + } + + rowCounter += batch.size; + + processBatch(batch); + } + protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) { LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column count " + batch.numCols); for (int column = 0; column < batch.numCols; column++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java new file mode 100644 index 0000000..944ebb8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Specialized class for doing a Native Vector FULL OUTER MapJoin on a Single-Column Long + * using a hash map. + */ +public class VectorMapJoinFullOuterLongOperator extends VectorMapJoinOuterLongOperator { + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterLongOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterLongOperator() { + super(); + } + + public VectorMapJoinFullOuterLongOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterLongOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java new file mode 100644 index 0000000..8e1b206 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Specialized class for doing a Native Vector FULL OUTER MapJoin on a Multi-Keyg + * using a hash map. + */ +public class VectorMapJoinFullOuterMultiKeyOperator extends VectorMapJoinOuterMultiKeyOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterMultiKeyOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterMultiKeyOperator() { + super(); + } + + public VectorMapJoinFullOuterMultiKeyOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterMultiKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java new file mode 100644 index 0000000..540c721 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Specialized class for doing a Native Vector FULL OUTER MapJoin on a Single-Column String + * using a hash map. + */ +public class VectorMapJoinFullOuterStringOperator extends VectorMapJoinOuterStringOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterStringOperator() { + super(); + } + + public VectorMapJoinFullOuterStringOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterStringOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 3821cc6..f5bb547 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; @@ -93,9 +92,6 @@ private transient Thread ownThread; private transient int interruptCheckCounter = CHECK_INTERRUPT_PER_OVERFLOW_BATCHES; - // Debug display. - protected transient long batchCounter; - /** Kryo ctor. */ protected VectorMapJoinGenerateResultOperator() { super(); @@ -124,13 +120,6 @@ private void setUpInterruptChecking() { ownThread = Thread.currentThread(); } - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); - - batchCounter = 0; - - } - //------------------------------------------------------------------------------------------------ protected void performValueExpressions(VectorizedRowBatch batch, @@ -157,24 +146,24 @@ protected void performValueExpressions(VectorizedRowBatch batch, batch.selectedInUse = saveSelectedInUse; } - protected void doSmallTableDeserializeRow(VectorizedRowBatch batch, int batchIndex, + protected void doSmallTableValueDeserializeRow(VectorizedRowBatch batch, int batchIndex, ByteSegmentRef byteSegmentRef, VectorMapJoinHashMapResult hashMapResult) throws HiveException { byte[] bytes = byteSegmentRef.getBytes(); int offset = (int) byteSegmentRef.getOffset(); int length = byteSegmentRef.getLength(); - smallTableVectorDeserializeRow.setBytes(bytes, offset, length); + smallTableValueVectorDeserializeRow.setBytes(bytes, offset, length); try { // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. - smallTableVectorDeserializeRow.deserializeByRef(batch, batchIndex); + smallTableValueVectorDeserializeRow.deserializeByRef(batch, batchIndex); } catch (Exception e) { throw new HiveException( "\nHashMapResult detail: " + hashMapResult.getDetailedHashMapResultPositionString() + "\nDeserializeRead detail: " + - smallTableVectorDeserializeRow.getDetailedReadPositionString(), + smallTableValueVectorDeserializeRow.getDetailedReadPositionString(), e); } } @@ -215,22 +204,23 @@ protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, for (int i = 0; i < duplicateCount; i++) { - int batchIndex = allMatchs[allMatchesIndex + i]; + final int batchIndex = allMatchs[allMatchesIndex + i]; - // Outer key copying is only used when we are using the input BigTable batch as the output. - // - if (bigTableVectorCopyOuterKeys != null) { - // Copy within row. - bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex); + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area within + // same batch by reference. + // + outerSmallTableKeyVectorCopy.copyByReference( + batch, batchIndex, + batch, batchIndex); } - if (smallTableVectorDeserializeRow != null) { - doSmallTableDeserializeRow(batch, batchIndex, + if (smallTableValueVectorDeserializeRow != null) { + doSmallTableValueDeserializeRow(batch, batchIndex, byteSegmentRef, hashMapResult); } - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, "generateHashMapResultSingleValue big table"); - // Use the big table row as output. batch.selected[numSel++] = batchIndex; } @@ -273,26 +263,45 @@ protected void generateHashMapResultMultiValue(VectorizedRowBatch batch, for (int i = 0; i < duplicateCount; i++) { - int batchIndex = allMatchs[allMatchesIndex + i]; + final int batchIndex = allMatchs[allMatchesIndex + i]; ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { // Copy the BigTable values into the overflow batch. Since the overflow batch may // not get flushed here, we must copy by value. - // Note this includes any outer join keys that need to go into the small table "area". + // if (bigTableRetainedVectorCopy != null) { - bigTableRetainedVectorCopy.copyByValue(batch, batchIndex, - overflowBatch, overflowBatch.size); + bigTableRetainedVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); } - if (smallTableVectorDeserializeRow != null) { + if (nonOuterSmallTableKeyVectorCopy != null) { - doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, - byteSegmentRef, hashMapResult); + // For non-[FULL] OUTER MapJoin, copy non-retained Big Table keys to the Big Table area + // across to overflow batch by value so Small Key projection will see its keys... + // + nonOuterSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); } - // VectorizedBatchUtil.debugDisplayOneRow(overflowBatch, overflowBatch.size, "generateHashMapResultMultiValue overflow"); + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area across + // to overflow batch by value. + // + outerSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } overflowBatch.size++; if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { @@ -333,8 +342,8 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, // Fill up as much of the overflow batch as possible with small table values. while (byteSegmentRef != null) { - if (smallTableVectorDeserializeRow != null) { - doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, + if (smallTableValueVectorDeserializeRow != null) { + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult); } @@ -361,9 +370,40 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, int batchIndex = allMatchs[allMatchesIndex + i]; if (bigTableRetainedVectorCopy != null) { + // The one big table row's values repeat. - bigTableRetainedVectorCopy.copyByReference(batch, batchIndex, overflowBatch, 0); - for (int column : bigTableRetainedMapping.getOutputColumns()) { + bigTableRetainedVectorCopy.copyByReference( + batch, batchIndex, + overflowBatch, 0); + for (int column : bigTableRetainColumnMap) { + overflowBatch.cols[column].isRepeating = true; + } + } + + if (nonOuterSmallTableKeyVectorCopy != null) { + + // For non-[FULL] OUTER MapJoin, copy non-retained Big Table keys to the Big Table area + // across to overflow batch by value so Small Key projection will see its keys... + // + nonOuterSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, 0); + for (int column : nonOuterSmallTableKeyColumnMap) { + overflowBatch.cols[column].isRepeating = true; + } + } + + int[] outerSmallTableKeyColumnMap = null; + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area within + // to overflow batch by value. + // + outerSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, 0); + outerSmallTableKeyColumnMap = outerSmallTableKeyMapping.getOutputColumns(); + for (int column : outerSmallTableKeyColumnMap) { overflowBatch.cols[column].isRepeating = true; } } @@ -373,10 +413,20 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, forwardOverflowNoReset(); // Hand reset the big table columns. - for (int column : bigTableRetainedMapping.getOutputColumns()) { + for (int column : bigTableRetainColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.reset(); + } + for (int column : nonOuterSmallTableKeyColumnMap) { ColumnVector colVector = overflowBatch.cols[column]; colVector.reset(); } + if (outerSmallTableKeyColumnMap != null) { + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.reset(); + } + } } byteSegmentRef = hashMapResult.next(); @@ -476,22 +526,16 @@ private void setupSpillSerDe(VectorizedRowBatch batch) throws HiveException { } private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, - VectorMapJoinHashTableResult hashTableResult) throws IOException { - - int partitionId = hashTableResult.spillPartitionId(); + int partitionId) throws IOException { HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable]; HashPartition hp = ht.getHashPartitions()[partitionId]; VectorRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); -// int offset = output.getLength(); bigTableVectorSerializeRow.setOutputAppend(output); bigTableVectorSerializeRow.serializeWrite(batch, batchIndex); -// int length = output.getLength() - offset; rowBytesContainer.finishRow(); - -// LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length); } protected void spillHashMapBatch(VectorizedRowBatch batch, @@ -509,8 +553,18 @@ protected void spillHashMapBatch(VectorizedRowBatch batch, int hashTableResultIndex = spillHashTableResultIndices[i]; VectorMapJoinHashTableResult hashTableResult = hashTableResults[hashTableResultIndex]; - spillSerializeRow(batch, batchIndex, hashTableResult); + spillSerializeRow(batch, batchIndex, hashTableResult.spillPartitionId()); + } + } + + protected void spillRow(VectorizedRowBatch batch, int batchIndex, int partitionId) + throws HiveException, IOException { + + if (bigTableVectorSerializeRow == null) { + setupSpillSerDe(batch); } + + spillSerializeRow(batch, batchIndex, partitionId); } protected void spillBatchRepeated(VectorizedRowBatch batch, @@ -525,7 +579,7 @@ protected void spillBatchRepeated(VectorizedRowBatch batch, for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - spillSerializeRow(batch, batchIndex, hashTableResult); + spillSerializeRow(batch, batchIndex, hashTableResult.spillPartitionId()); } } @@ -541,8 +595,8 @@ protected void reloadHashTable(byte pos, int partitionId) MapJoinTableContainer smallTable = spilledMapJoinTables[pos]; - vectorMapJoinHashTable = VectorMapJoinOptimizedCreateHashTable.createHashTable(conf, - smallTable); + vectorMapJoinHashTable = + VectorMapJoinOptimizedCreateHashTable.createHashTable(conf, smallTable); needHashTableSetup = true; LOG.info("Created " + vectorMapJoinHashTable.getClass().getSimpleName() + " from " + this.getClass().getSimpleName()); @@ -637,7 +691,7 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException batch.projectionSize = outputProjection.length; batch.projectedColumns = outputProjection; - forward(batch, null, true); + vectorForward(batch); // Revert the projected columns back, because batch can be re-used by our parent operators. batch.projectionSize = originalProjectionSize; @@ -649,7 +703,7 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException * Forward the overflow batch and reset the batch. */ protected void forwardOverflow() throws HiveException { - forward(overflowBatch, null, true); + vectorForward(overflowBatch); overflowBatch.reset(); maybeCheckInterrupt(); } @@ -666,7 +720,7 @@ private void maybeCheckInterrupt() throws HiveException { * Forward the overflow batch, but do not reset the batch. */ private void forwardOverflowNoReset() throws HiveException { - forward(overflowBatch, null, true); + vectorForward(overflowBatch); } /* @@ -679,6 +733,11 @@ private void forwardOverflowNoReset() throws HiveException { @Override public void closeOp(boolean aborted) throws HiveException { super.closeOp(aborted); + + // NOTE: The closeOp call on super MapJoinOperator can trigger Hybrid Grace additional + // NOTE: processing and also FULL OUTER MapJoin non-match Small Table result generation. So, + // NOTE: we flush the overflowBatch after the call. + // if (!aborted && overflowBatch.size > 0) { forwardOverflow(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java index f791d95..35ddddd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java @@ -103,25 +103,25 @@ public VectorMapJoinInnerBigOnlyGenerateResultOperator(CompilationOpContext ctx, /* * Setup our inner big table only join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Inner big-table only join specific. VectorMapJoinHashMultiSet baseHashMultiSet = (VectorMapJoinHashMultiSet) vectorMapJoinHashTable; - hashMultiSetResults = new VectorMapJoinHashMultiSetResult[batch.DEFAULT_SIZE]; + hashMultiSetResults = new VectorMapJoinHashMultiSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMultiSetResults.length; i++) { hashMultiSetResults[i] = baseHashMultiSet.createHashMultiSetResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesValueCounts = new long[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesValueCounts = new long[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } //----------------------------------------------------------------------------------------------- diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index 678fa42..30a19b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -102,45 +102,36 @@ public VectorMapJoinInnerBigOnlyLongOperator(CompilationOpContext ctx, OperatorD // @Override - public void process(Object row, int tag) throws HiveException { + protected void commonSetup() throws HiveException { + super.commonSetup(); - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ + /* + * Initialize Single-Column Long members for this specialized class. + */ - singleJoinColumn = bigTableKeyColumnMap[0]; - - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. - - /* - * Get our Single-Column Long hash multi-set information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable; - useMinMax = hashMultiSet.useMinMax(); - if (useMinMax) { - min = hashMultiSet.min(); - max = hashMultiSet.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash multi-set information for this specialized class. + */ + + hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable; + useMinMax = hashMultiSet.useMinMax(); + if (useMinMax) { + min = hashMultiSet.min(); + max = hashMultiSet.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -153,11 +144,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index 866aa60..f587517 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; @@ -109,45 +108,40 @@ public VectorMapJoinInnerBigOnlyMultiKeyOperator(CompilationOpContext ctx, Opera // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Multi-Key members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Multi-Key members for this specialized class. - */ + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - needCommonSetup = false; - } + /* + * Get our Single-Column Long hash multi-set information for this specialized class. + */ - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Get our Multi-Key hash multi-set information for this specialized class. + */ - /* - * Get our Multi-Key hash multi-set information for this specialized class. - */ - - hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -160,11 +154,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index a0c3b9c..e373db1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; @@ -98,40 +97,31 @@ public VectorMapJoinInnerBigOnlyStringOperator(CompilationOpContext ctx, Operato // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash multi-set information for this specialized class. - */ + /* + * Get our Single-Column String hash multi-set information for this specialized class. + */ - hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -144,11 +134,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java index ea2c04d..dc5d046 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java @@ -108,26 +108,26 @@ public VectorMapJoinInnerGenerateResultOperator(CompilationOpContext ctx, Operat /* * Setup our inner join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Inner join specific. VectorMapJoinHashMap baseHashMap = (VectorMapJoinHashMap) vectorMapJoinHashTable; - hashMapResults = new VectorMapJoinHashMapResult[batch.DEFAULT_SIZE]; + hashMapResults = new VectorMapJoinHashMapResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMapResults.length; i++) { hashMapResults[i] = baseHashMap.createHashMapResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesIsSingleValue = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } /* @@ -142,7 +142,7 @@ protected void innerPerBatchSetup(VectorizedRowBatch batch) { // For join operators that can generate small table results, reset their // (target) scratch columns. - for (int column : smallTableOutputVectorColumns) { + for (int column : smallTableValueColumnMap) { ColumnVector smallTableColumn = batch.cols[column]; smallTableColumn.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index 36404bc..5ac606a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; @@ -101,45 +100,36 @@ public VectorMapJoinInnerLongOperator(CompilationOpContext ctx, OperatorDesc con // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + protected void commonSetup() throws HiveException { + super.commonSetup(); - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Initialize Single-Column Long members for this specialized class. + */ - /* - * Get our Single-Column Long hash map information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; - useMinMax = hashMap.useMinMax(); - if (useMinMax) { - min = hashMap.min(); - max = hashMap.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash map information for this specialized class. + */ + + hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + useMinMax = hashMap.useMinMax(); + if (useMinMax) { + min = hashMap.min(); + max = hashMap.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -151,11 +141,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index 620101f..cdee3fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -107,45 +106,36 @@ public VectorMapJoinInnerMultiKeyOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ - - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + /* + * Initialize Multi-Key members for this specialized class. + */ - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - needCommonSetup = false; - } + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Multi-Key hash map information for this specialized class. - */ + /* + * Get our Multi-Key hash map information for this specialized class. + */ - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -157,11 +147,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index d99d514..8e6697e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -97,40 +96,31 @@ public VectorMapJoinInnerStringOperator(CompilationOpContext ctx, OperatorDesc c // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash map information for this specialized class. - */ + /* + * Get our Single-Column String hash map information for this specialized class. + */ - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -142,11 +132,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java index f68d4c4..71ec56b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java @@ -89,21 +89,21 @@ public VectorMapJoinLeftSemiGenerateResultOperator(CompilationOpContext ctx, Ope /* * Setup our left semi join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Semi join specific. VectorMapJoinHashSet baseHashSet = (VectorMapJoinHashSet) vectorMapJoinHashTable; - hashSetResults = new VectorMapJoinHashSetResult[batch.DEFAULT_SIZE]; + hashSetResults = new VectorMapJoinHashSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashSetResults.length; i++) { hashSetResults[i] = baseHashSet.createHashSetResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } //----------------------------------------------------------------------------------------------- diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index 4185c5b..40e7cfa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashSet; @@ -102,45 +101,36 @@ public VectorMapJoinLeftSemiLongOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + protected void commonSetup() throws HiveException { + super.commonSetup(); - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Initialize Single-Column Long members for this specialized class. + */ - /* - * Get our Single-Column Long hash set information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashSet = (VectorMapJoinLongHashSet) vectorMapJoinHashTable; - useMinMax = hashSet.useMinMax(); - if (useMinMax) { - min = hashSet.min(); - max = hashSet.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash set information for this specialized class. + */ + + hashSet = (VectorMapJoinLongHashSet) vectorMapJoinHashTable; + useMinMax = hashSet.useMinMax(); + if (useMinMax) { + min = hashSet.min(); + max = hashSet.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -153,11 +143,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index 541e7fa..e5d9fda 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -108,45 +107,36 @@ public VectorMapJoinLeftSemiMultiKeyOperator(CompilationOpContext ctx, OperatorD // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ - - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + /* + * Initialize Multi-Key members for this specialized class. + */ - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - needCommonSetup = false; - } + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Multi-Key hash set information for this specialized class. - */ + /* + * Get our Multi-Key hash set information for this specialized class. + */ - hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -159,11 +149,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 6785bce..df900a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -98,40 +97,31 @@ public VectorMapJoinLeftSemiStringOperator(CompilationOpContext ctx, OperatorDes // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash set information for this specialized class. - */ + /* + * Get our Single-Column String hash set information for this specialized class. + */ - hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -144,11 +134,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 2e5c568..077a68e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -24,16 +24,23 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; /** @@ -131,32 +138,34 @@ public VectorMapJoinOuterGenerateResultOperator(CompilationOpContext ctx, Operat /* * Setup our outer join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Outer join specific. VectorMapJoinHashMap baseHashMap = (VectorMapJoinHashMap) vectorMapJoinHashTable; - hashMapResults = new VectorMapJoinHashMapResult[batch.DEFAULT_SIZE]; + hashMapResults = new VectorMapJoinHashMapResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMapResults.length; i++) { hashMapResults[i] = baseHashMap.createHashMapResult(); } - inputSelected = new int[batch.DEFAULT_SIZE]; + inputSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesIsSingleValue = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; - nonSpills = new int[batch.DEFAULT_SIZE]; - noMatchs = new int[batch.DEFAULT_SIZE]; - merged = new int[batch.DEFAULT_SIZE]; + nonSpills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + noMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; + merged = new int[VectorizedRowBatch.DEFAULT_SIZE]; + + matchTracker = null; } @@ -174,15 +183,16 @@ protected void outerPerBatchSetup(VectorizedRowBatch batch) { // For join operators that can generate small table results, reset their // (target) scratch columns. - for (int column : smallTableOutputVectorColumns) { + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector bigTableOuterKeyColumn = batch.cols[column]; + bigTableOuterKeyColumn.reset(); + } + + for (int column : smallTableValueColumnMap) { ColumnVector smallTableColumn = batch.cols[column]; smallTableColumn.reset(); } - for (int column : bigTableOuterKeyOutputVectorColumns) { - ColumnVector bigTableOuterKeyColumn = batch.cols[column]; - bigTableOuterKeyColumn.reset(); - } } /** @@ -569,27 +579,28 @@ public void finishOuter(VectorizedRowBatch batch, protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, int noMatchSize) throws IOException, HiveException { - // Set null information in the small table results area. + // Set null information in the small table results area. - for (int i = 0; i < noMatchSize; i++) { - int batchIndex = noMatchs[i]; + for (int i = 0; i < noMatchSize; i++) { + int batchIndex = noMatchs[i]; - // Mark any scratch small table scratch columns that would normally receive a copy of the - // key as null, too. - for (int column : bigTableOuterKeyOutputVectorColumns) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; - } + // Mark any scratch small table scratch columns that would normally receive a copy of the + // key as null, too. + // + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector colVector = batch.cols[column]; + colVector.noNulls = false; + colVector.isNull[batchIndex] = true; + } - // Small table values are set to null. - for (int column : smallTableOutputVectorColumns) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; - } - } - } + // Small table values are set to null. + for (int column : smallTableValueColumnMap) { + ColumnVector colVector = batch.cols[column]; + colVector.noNulls = false; + colVector.isNull[batchIndex] = true; + } + } + } /** * Generate the outer join output results for one vectorized row batch with a repeated key. @@ -734,20 +745,310 @@ public void finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult jo */ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException { - for (int column : smallTableOutputVectorColumns) { + // Mark any scratch small table scratch columns that would normally receive a copy of the + // key as null, too. + // + for (int column : outerSmallTableKeyColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; } - // Mark any scratch small table scratch columns that would normally receive a copy of the key - // as null, too. - for (int column : bigTableOuterKeyOutputVectorColumns) { + for (int column : smallTableValueColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; } } + + private void markBigTableColumnsAsNullRepeating() { + + /* + * For non-match FULL OUTER Small Table results, the Big Table columns are all NULL. + */ + for (int column : bigTableRetainColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.isRepeating = true; + colVector.noNulls = false; + colVector.isNull[0] = true; + } + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table keys and values and odd them to the + * join output result. + */ + @Override + protected void generateFullOuterSmallTableNoMatches(byte smallTablePos, + MapJoinTableContainer substituteSmallTable) throws HiveException { + + /* + * For dynamic partition hash join, both the Big Table and Small Table are partitioned (sent) + * to the Reducer using the key hash code. So, we can generate the non-match Small Table + * results locally. + * + * Scan the Small Table for keys that didn't match and generate the non-matchs into the + * overflowBatch. + */ + + /* + * If there were no matched keys sent, we need to do our common initialization. + */ + if (needCommonSetup) { + + // Our one time process method initialization. + commonSetup(); + + needCommonSetup = false; + } + + if (needHashTableSetup) { + + // Setup our hash table specialization. It will be the first time the process + // method is called, or after a Hybrid Grace reload. + + hashTableSetup(); + + needHashTableSetup = false; + } + + /* + * To support fancy NULL repeating columns, let's flush the overflowBatch if it has anything. + */ + if (overflowBatch.size > 0) { + forwardOverflow(); + } + markBigTableColumnsAsNullRepeating(); + + switch (hashTableKeyType) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + generateFullOuterLongKeySmallTableNoMatches(); + break; + case STRING: + generateFullOuterStringKeySmallTableNoMatches(); + break; + case MULTI_KEY: + generateFullOuterMultiKeySmallTableNoMatches(); + break; + default: + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType); + } + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table Long keys and values and odd them to + * the join output result. + */ + protected void generateFullOuterLongKeySmallTableNoMatches() + throws HiveException { + + final LongColumnVector singleSmallTableKeyOutputColumnVector; + if (allSmallTableKeyColumnIncluded[0]) { + singleSmallTableKeyOutputColumnVector = + (LongColumnVector) overflowBatch.cols[allSmallTableKeyColumnNums[0]]; + } else { + singleSmallTableKeyOutputColumnVector = null; + } + + VectorMapJoinLongHashMap hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + final long longKey; + boolean isKeyNull = !nonMatchedIterator.readNonMatchedLongKey(); + if (!isKeyNull) { + longKey = nonMatchedIterator.getNonMatchedLongKey(); + } else { + longKey = 0; + } + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (singleSmallTableKeyOutputColumnVector != null) { + if (isKeyNull) { + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = true; + singleSmallTableKeyOutputColumnVector.noNulls = false; + } else { + singleSmallTableKeyOutputColumnVector.vector[overflowBatch.size] = longKey; + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = false; + } + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + } + + private void doSmallTableKeyDeserializeRow(VectorizedRowBatch batch, int batchIndex, + byte[] keyBytes, int keyOffset, int keyLength) + throws HiveException { + + smallTableKeyOuterVectorDeserializeRow.setBytes(keyBytes, keyOffset, keyLength); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + smallTableKeyOuterVectorDeserializeRow.deserializeByRef(batch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + smallTableKeyOuterVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table Multi-Keys and values and odd them to + * the join output result. + */ + protected void generateFullOuterMultiKeySmallTableNoMatches() throws HiveException { + + VectorMapJoinBytesHashMap hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + nonMatchedIterator.readNonMatchedBytesKey(); + byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes(); + final int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + final int keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (smallTableKeyOuterVectorDeserializeRow != null) { + doSmallTableKeyDeserializeRow(overflowBatch, overflowBatch.size, + keyBytes, keyOffset, keyLength); + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + + // NOTE: We don't have to deal with FULL OUTER All-NULL key values like we do for single-column + // LONG and STRING because we do store them in the hash map... + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table String keys and values and odd them to + * the join output result. + */ + protected void generateFullOuterStringKeySmallTableNoMatches() throws HiveException { + + final BytesColumnVector singleSmallTableKeyOutputColumnVector; + if (allSmallTableKeyColumnIncluded[0]) { + singleSmallTableKeyOutputColumnVector = + (BytesColumnVector) overflowBatch.cols[allSmallTableKeyColumnNums[0]]; + } else { + singleSmallTableKeyOutputColumnVector = null; + } + + VectorMapJoinBytesHashMap hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + final byte[] keyBytes; + final int keyOffset; + final int keyLength; + boolean isKeyNull = !nonMatchedIterator.readNonMatchedBytesKey(); + if (!isKeyNull) { + keyBytes = nonMatchedIterator.getNonMatchedBytes(); + keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + } else { + keyBytes = null; + keyOffset = 0; + keyLength = 0; + } + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (singleSmallTableKeyOutputColumnVector != null) { + if (isKeyNull) { + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = true; + singleSmallTableKeyOutputColumnVector.noNulls = false; + } else { + singleSmallTableKeyOutputColumnVector.setVal( + overflowBatch.size, + keyBytes, keyOffset, keyLength); + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = false; + } + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + } + + protected void fullOuterHashTableSetup() { + + // Always track key matches for FULL OUTER. + matchTracker = vectorMapJoinHashTable.createMatchTracker(); + + } + + protected void fullOuterIntersectHashTableSetup() { + + matchTracker = vectorMapJoinHashTable.createMatchTracker(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index be05cc2..f3f5a36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; @@ -65,7 +64,7 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinLongHashMap hashMap; + protected transient VectorMapJoinLongHashMap hashMap; //--------------------------------------------------------------------------- // Single-Column Long specific members. @@ -77,7 +76,7 @@ protected String getLoggingPrefix() { private transient long max; // The column number for this one column join specialization. - private transient int singleJoinColumn; + protected transient int singleJoinColumn; //--------------------------------------------------------------------------- // Pass-thru constructors. @@ -102,55 +101,39 @@ public VectorMapJoinOuterLongOperator(CompilationOpContext ctx, OperatorDesc con // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; - - needCommonSetup = false; - } + /* + * Initialize Single-Column Long members for this specialized class. + */ - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. - - /* - * Get our Single-Column Long hash map information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; - useMinMax = hashMap.useMinMax(); - if (useMinMax) { - min = hashMap.min(); - max = hashMap.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash map information for this specialized class. + */ + + hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + useMinMax = hashMap.useMinMax(); + if (useMinMax) { + min = hashMap.min(); + max = hashMap.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -160,9 +143,6 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -174,19 +154,6 @@ public void process(Object row, int tag) throws HiveException { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -234,12 +201,11 @@ public void process(Object row, int tag) throws HiveException { } else { // Handle *repeated* join key, if found. long key = vector[0]; - // LOG.debug(CLASS_NAME + " repeated key " + key); if (useMinMax && (key < min || key > max)) { // Out of range for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; } else { - joinResult = hashMap.lookup(key, hashMapResults[0]); + joinResult = hashMap.lookup(key, hashMapResults[0], matchTracker); } } @@ -247,9 +213,6 @@ public void process(Object row, int tag) throws HiveException { * Common repeated join result processing. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); - } finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize); } else { @@ -258,10 +221,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -286,8 +245,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Single-Column Long outer null detection. */ @@ -305,7 +262,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -354,11 +310,10 @@ public void process(Object row, int tag) throws HiveException { // Key out of range for whole hash table. saveJoinResult = JoinUtil.JoinResult.NOMATCH; } else { - saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount], + matchTracker); } - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + currentKey + " " + saveJoinResult.name()); - /* * Common outer join result processing. */ @@ -370,7 +325,6 @@ public void process(Object row, int tag) throws HiveException { equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -381,11 +335,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -393,7 +345,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -403,13 +354,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -451,7 +398,9 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index 70f88e3..29c531b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -69,17 +68,17 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinBytesHashMap hashMap; + protected transient VectorMapJoinBytesHashMap hashMap; //--------------------------------------------------------------------------- // Multi-Key specific members. // // Object that can take a set of columns in row in a vectorized row batch and serialized it. - private transient VectorSerializeRow keyVectorSerializeWrite; + protected transient VectorSerializeRow keyVectorSerializeWrite; // The BinarySortable serialization of the current key. - private transient Output currentKeyOutput; + protected transient Output currentKeyOutput; // The BinarySortable serialization of the saved key for a possible series of equal keys. private transient Output saveKeyOutput; @@ -107,55 +106,40 @@ public VectorMapJoinOuterMultiKeyOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Multi-Key members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - /* - * Initialize Multi-Key members for this specialized class. - */ - - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - needCommonSetup = false; - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Get our Multi-Key hash map information for this specialized class. + */ - /* - * Get our Multi-Key hash map information for this specialized class. - */ + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -165,9 +149,6 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -179,19 +160,6 @@ public void process(Object row, int tag) throws HiveException { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -259,16 +227,13 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite.serializeWrite(batch, 0); byte[] keyBytes = currentKeyOutput.getData(); int keyLength = currentKeyOutput.getLength(); - joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]); + joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0], matchTracker); } /* * Common repeated join result processing. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); - } finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize); } else { @@ -277,10 +242,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -305,8 +266,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Multi-Key outer null detection. */ @@ -325,7 +284,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -375,7 +333,9 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = saveKeyOutput.getData(); int keyLength = saveKeyOutput.getLength(); - saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, + hashMapResults[hashMapResultCount], matchTracker); + /* * Common outer join result processing. @@ -388,7 +348,6 @@ public void process(Object row, int tag) throws HiveException { equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -399,11 +358,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -411,7 +368,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -421,13 +377,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -469,7 +421,9 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index 714f5ec..a19941a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -65,14 +64,14 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinBytesHashMap hashMap; + protected transient VectorMapJoinBytesHashMap hashMap; //--------------------------------------------------------------------------- // Single-Column String specific members. // // The column number for this one column join specialization. - private transient int singleJoinColumn; + protected transient int singleJoinColumn; //--------------------------------------------------------------------------- // Pass-thru constructors. @@ -97,50 +96,35 @@ public VectorMapJoinOuterStringOperator(CompilationOpContext ctx, OperatorDesc c // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; + protected void commonSetup() throws HiveException { + super.commonSetup(); - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + /* + * Initialize Single-Column String members for this specialized class. + */ - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + singleJoinColumn = bigTableKeyColumnMap[0]; + } - needCommonSetup = false; - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Get our Single-Column String hash map information for this specialized class. + */ - /* - * Get our Single-Column String hash map information for this specialized class. - */ + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -150,33 +134,17 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } // Filtering for outer join just removes rows available for hash table matching. - boolean someRowsFilteredOut = false; + boolean someRowsFilteredOut = false; if (bigTableFilterExpressions.length > 0) { // Since the input for (VectorExpression ve : bigTableFilterExpressions) { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -228,7 +196,8 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = vector[0]; int keyStart = start[0]; int keyLength = length[0]; - joinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[0]); + joinResult = hashMap.lookup( + keyBytes, keyStart, keyLength, hashMapResults[0], matchTracker); } /* @@ -246,10 +215,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -274,8 +239,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Single-Column String outer null detection. */ @@ -293,7 +256,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -343,7 +305,8 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = vector[batchIndex]; int keyStart = start[batchIndex]; int keyLength = length[batchIndex]; - saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, + hashMapResults[hashMapResultCount], matchTracker); /* * Common outer join result processing. @@ -356,7 +319,6 @@ public void process(Object row, int tag) throws HiveException { equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -367,11 +329,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -379,7 +339,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -389,13 +348,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -437,7 +392,9 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 57db136..ef130ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -23,9 +23,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; @@ -46,12 +50,115 @@ protected BytesWritable testValueBytesWritable; + private long fullOuterNullKeyValueRef; + + private static class NonMatchedBytesHashMapIterator extends VectorMapJoinFastNonMatchedIterator { + + private VectorMapJoinFastBytesHashMap hashMap; + + private boolean noMore; + private boolean keyIsNull; + + private WriteBuffers.Position nonMatchedReadPos; + + private ByteSegmentRef nonMatchedKeyByteSegmentRef; + + private VectorMapJoinFastValueStore.HashMapResult nonMatchedHashMapResult; + + NonMatchedBytesHashMapIterator(MatchTracker matchTracker, + VectorMapJoinFastBytesHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + noMore = false; + keyIsNull = false; + nonMatchedReadPos = new WriteBuffers.Position(); + nonMatchedKeyByteSegmentRef = new ByteSegmentRef(); + nonMatchedHashMapResult = new VectorMapJoinFastValueStore.HashMapResult(); + } + + @Override + public boolean findNextNonMatched() { + if (noMore) { + return false; + } + while (true) { + nonMatchedLogicalSlotNum++; + if (nonMatchedLogicalSlotNum >= hashMap.logicalHashBucketCount) { + + // Fall below and handle Small Table NULL key. + break; + } + final int nonMatchedTripleIndex = nonMatchedLogicalSlotNum * 3; + if (hashMap.slotTriples[nonMatchedTripleIndex] != 0) { + if (!matchTracker.wasMatched(nonMatchedLogicalSlotNum)) { + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.slotTriples[nonMatchedTripleIndex + 2]); + keyIsNull = false; + return true; + } + } + } + + // Do we have a Small Table NULL Key? + if (hashMap.fullOuterNullKeyValueRef == 0) { + return false; + } + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.fullOuterNullKeyValueRef); + noMore = true; + keyIsNull = true; + return true; + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + if (keyIsNull) { + return false; + } + hashMap.keyStore.getKey( + hashMap.slotTriples[nonMatchedLogicalSlotNum * 3], + nonMatchedKeyByteSegmentRef, + nonMatchedReadPos); + return true; + } + + @Override + public byte[] getNonMatchedBytes() { + return nonMatchedKeyByteSegmentRef.getBytes(); + } + + @Override + public int getNonMatchedBytesOffset() { + return (int) nonMatchedKeyByteSegmentRef.getOffset(); + } + + @Override + public int getNonMatchedBytesLength() { + return nonMatchedKeyByteSegmentRef.getLength(); + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + return nonMatchedHashMapResult; + } + } + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedBytesHashMapIterator(matchTracker, this); + } + + @Override public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, long hashCode, boolean isNewKey, BytesWritable currentValue) { @@ -64,31 +171,56 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength); - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2] = valueStore.addMore(slotTriples[tripleIndex + 2], valueBytes, 0, valueLength); - // LOG.debug("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } } @Override - public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) { + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult) { VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; optimizedHashMapResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long valueRefWord = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (valueRefWord == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null)); + optimizedHashMapResult.set(valueStore, slotTriples[tripleIndex + 2]); - optimizedHashMapResult.set(valueStore, valueRefWord); + joinResult = JoinUtil.JoinResult.MATCH; + } + + optimizedHashMapResult.setJoinResult(joinResult); + + return joinResult; + } + + @Override + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) { + VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = + (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; + + optimizedHashMapResult.forget(); + + long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); + JoinUtil.JoinResult joinResult; + if (tripleIndex == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + optimizedHashMapResult.set(valueStore, slotTriples[tripleIndex + 2]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -98,10 +230,27 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, return joinResult; } + public void addFullOuterNullKeyValue(BytesWritable currentValue) { + + byte[] valueBytes = currentValue.getBytes(); + int valueLength = currentValue.getLength(); + + if (fullOuterNullKeyValueRef == 0) { + fullOuterNullKeyValueRef = valueStore.addFirst(valueBytes, 0, valueLength); + } else { + + // Add another value. + fullOuterNullKeyValueRef = + valueStore.addMore(fullOuterNullKeyValueRef, valueBytes, 0, valueLength); + } + } + public VectorMapJoinFastBytesHashMap( int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + fullOuterNullKeyValueRef = 0; + valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); // Share the same write buffers with our value store. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 726fd29..c0295dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -57,10 +57,8 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Count. - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2]++; } } @@ -75,13 +73,20 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, optimizedHashMultiSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long count = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMultiSetResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMultiSetResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (count == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - - optimizedHashMultiSetResult.set(count); + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + */ + optimizedHashMultiSetResult.set(slotTriples[tripleIndex + 2]); joinResult = JoinUtil.JoinResult.MATCH; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 5d750a8..e99a029 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -65,11 +65,19 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long existance = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashSetResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashSetResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (existance == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index f2b794f..dcb89b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -70,13 +70,11 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr while (true) { int tripleIndex = 3 * slot; if (slotTriples[tripleIndex] == 0) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty"); isNewKey = true;; break; } if (hashCode == slotTriples[tripleIndex + 1] && keyStore.unsafeEqualKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing"); isNewKey = false; break; } @@ -150,7 +148,6 @@ private void expandAndRehash() { } // Use old value reference word. - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")"); newSlotTriples[newTripleIndex] = keyRef; newSlotTriples[newTripleIndex + 1] = hashCode; @@ -165,10 +162,9 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected final long findReadSlot( + protected final int findReadSlot( byte[] keyBytes, int keyStart, int keyLength, long hashCode, WriteBuffers.Position readPos) { int intHashCode = (int) hashCode; @@ -177,7 +173,6 @@ protected final long findReadSlot( int i = 0; while (true) { int tripleIndex = slot * 3; - // LOG.debug("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); if (slotTriples[tripleIndex] == 0) { // Given that we do not delete, an empty slot means no match. return -1; @@ -185,7 +180,7 @@ protected final long findReadSlot( // Finally, verify the key bytes match. if (keyStore.equalKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength, readPos)) { - return slotTriples[tripleIndex + 2]; + return tripleIndex; } } // Some other key (collision) - keep probing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java index cbcc9b1..bcc9cb9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java @@ -22,7 +22,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTable { public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastHashTable.class); @@ -96,4 +98,19 @@ public long getEstimatedMemorySize() { JavaDataModel jdm = JavaDataModel.get(); return JavaDataModel.alignUp(10L * jdm.primitive1() + jdm.primitive2(), jdm.memoryAlign()); } + + @Override + public MatchTracker createMatchTracker() { + return MatchTracker.create(logicalHashBucketCount); + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override + public int spillPartitionId() { + throw new RuntimeException("Not implemented"); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java index b6684e0..0a3c84a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java @@ -22,6 +22,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; // Optimized for sequential key lookup. @@ -124,13 +125,11 @@ public boolean unsafeEqualKey(long keyRefWord, byte[] keyBytes, int keyStart, in public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyLength, WriteBuffers.Position readPos) { - int storedKeyLengthLength = + int storedKeyLength = (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); - boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); - // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord)); - - if (isKeyLengthSmall && storedKeyLengthLength != keyLength) { + if (isKeyLengthSmall && storedKeyLength != keyLength) { return false; } long absoluteKeyOffset = @@ -139,16 +138,14 @@ public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyL writeBuffers.setReadPoint(absoluteKeyOffset, readPos); if (!isKeyLengthSmall) { // Read big value length we wrote with the value. - storedKeyLengthLength = writeBuffers.readVInt(readPos); - if (storedKeyLengthLength != keyLength) { - // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length"); + storedKeyLength = writeBuffers.readVInt(readPos); + if (storedKeyLength != keyLength) { return false; } } // Our reading is positioned to the key. if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { - // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes"); return false; } @@ -174,4 +171,23 @@ public long getEstimatedMemorySize() { size += unsafeReadPos == null ? 0 : unsafeReadPos.getEstimatedMemorySize(); return size; } + + public void getKey(long keyRefWord, ByteSegmentRef keyByteSegmentRef, + WriteBuffers.Position readPos) { + + int storedKeyLength = + (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); + + long absoluteKeyOffset = + (keyRefWord & AbsoluteKeyOffset.bitMask); + + writeBuffers.setReadPoint(absoluteKeyOffset, readPos); + if (!isKeyLengthSmall) { + // Read big value length we wrote with the value. + storedKeyLength = writeBuffers.readVInt(readPos); + } + writeBuffers.getByteSegmentRefToCurrent(keyByteSegmentRef, storedKeyLength, readPos); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java index f42430d..0dc2f47 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java @@ -22,13 +22,17 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; @@ -41,17 +45,117 @@ extends VectorMapJoinFastLongHashTable implements VectorMapJoinLongHashMap, MemoryEstimate { - public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMap.class); + // public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMap.class); + + private final boolean isSaveNullKeyValuesForFullOuter; protected VectorMapJoinFastValueStore valueStore; private BytesWritable testValueBytesWritable; + private long fullOuterNullKeyValueRef; + + private static class NonMatchedLongHashMapIterator extends VectorMapJoinFastNonMatchedIterator { + + private VectorMapJoinFastLongHashMap hashMap; + + private boolean noMore; + private boolean keyIsNull; + + private WriteBuffers.Position nonMatchedReadPos; + + private ByteSegmentRef nonMatchedKeyByteSegmentRef; + + private VectorMapJoinFastValueStore.HashMapResult nonMatchedHashMapResult; + + NonMatchedLongHashMapIterator(MatchTracker matchTracker, + VectorMapJoinFastLongHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + noMore = false; + keyIsNull = false; + nonMatchedHashMapResult = new VectorMapJoinFastValueStore.HashMapResult(); + } + + @Override + public boolean findNextNonMatched() { + if (noMore) { + return false; + } + while (true) { + nonMatchedLogicalSlotNum++; + if (nonMatchedLogicalSlotNum >= hashMap.logicalHashBucketCount){ + + // Fall below and handle Small Table NULL key. + break; + } + final int nonMatchedDoubleIndex = nonMatchedLogicalSlotNum * 2; + if (hashMap.slotPairs[nonMatchedDoubleIndex] != 0) { + if (!matchTracker.wasMatched(nonMatchedLogicalSlotNum)) { + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.slotPairs[nonMatchedDoubleIndex]); + keyIsNull = false; + return true; + } + } + } + + // Do we have a Small Table NULL Key? + if (hashMap.fullOuterNullKeyValueRef == 0) { + return false; + } + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.fullOuterNullKeyValueRef); + noMore = true; + keyIsNull = true; + return true; + } + + @Override + public boolean readNonMatchedLongKey() { + return !keyIsNull; + } + + @Override + public long getNonMatchedLongKey() { + return hashMap.slotPairs[nonMatchedLogicalSlotNum * 2 + 1]; + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + return nonMatchedHashMapResult; + } + } + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedLongHashMapIterator(matchTracker, this); + } + + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + if (!adaptPutRow(currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + addFullOuterNullKeyValue(currentValue); + } + + } + } + /* * A Unit Test convenience method for putting key and value into the hash table using the * actual types. @@ -91,13 +195,12 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre optimizedHashMapResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - // LOG.debug("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode); - long valueRef = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (valueRef == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - optimizedHashMapResult.set(valueStore, valueRef); + optimizedHashMapResult.set(valueStore, slotPairs[pairIndex]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -107,12 +210,59 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre return joinResult; } + @Override + public JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult, + MatchTracker matchTracker) { + + VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = + (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; + + optimizedHashMapResult.forget(); + + long hashCode = HashCodeUtil.calculateLongHashCode(key); + int pairIndex = findReadSlot(key, hashCode); + JoinUtil.JoinResult joinResult; + if (pairIndex == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + optimizedHashMapResult.set(valueStore, slotPairs[pairIndex]); + + joinResult = JoinUtil.JoinResult.MATCH; + } + + optimizedHashMapResult.setJoinResult(joinResult); + + return joinResult; + } + + public void addFullOuterNullKeyValue(BytesWritable currentValue) { + + byte[] valueBytes = currentValue.getBytes(); + int valueLength = currentValue.getLength(); + + if (fullOuterNullKeyValueRef == 0) { + fullOuterNullKeyValueRef = valueStore.addFirst(valueBytes, 0, valueLength); + } else { + + // Add another value. + fullOuterNullKeyValueRef = + valueStore.addMore(fullOuterNullKeyValueRef, valueBytes, 0, valueLength); + } + } + public VectorMapJoinFastLongHashMap( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + boolean isSaveNullKeyValuesForFullOuter, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); + fullOuterNullKeyValueRef = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java index 228fa72..eda8a56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java @@ -42,11 +42,29 @@ public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMultiSet.class); + private final boolean isSaveNullKeyValuesForFullOuter; + + private long fullOuterNullKeyValueCount; + @Override public VectorMapJoinHashMultiSetResult createHashMultiSetResult() { return new VectorMapJoinFastHashMultiSet.HashMultiSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + if (!adaptPutRow(currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + fullOuterNullKeyValueCount++; + } + + } + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -80,12 +98,19 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre optimizedHashMultiSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - long count = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (count == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - optimizedHashMultiSetResult.set(count); + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + */ + optimizedHashMultiSetResult.set(slotPairs[pairIndex]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -95,10 +120,14 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre } public VectorMapJoinFastLongHashMultiSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + boolean isSaveNullKeyValuesForFullOuter, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + fullOuterNullKeyValueCount = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index 4c049cb..14b1965 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -47,6 +47,14 @@ public VectorMapJoinHashSetResult createHashSetResult() { return new VectorMapJoinFastHashSet.HashSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + // Ignore NULL keys (HashSet not used for FULL OUTER). + adaptPutRow(currentKey, currentValue); + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -76,11 +84,18 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - long existance = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (existance == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } @@ -91,9 +106,10 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { } public VectorMapJoinFastLongHashSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index c9c3e80..8b775fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -49,8 +49,6 @@ private final HashTableKeyType hashTableKeyType; - private final boolean isOuterJoin; - private final BinarySortableDeserializeRead keyBinarySortableDeserializeRead; private final boolean useMinMax; @@ -72,14 +70,13 @@ public long max() { return max; } - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public boolean adaptPutRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { - return; + return false; } } catch (Exception e) { throw new HiveException( @@ -92,6 +89,7 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws keyBinarySortableDeserializeRead, hashTableKeyType); add(key, currentValue); + return true; } protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue); @@ -215,10 +213,9 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected long findReadSlot(long key, long hashCode) { + protected int findReadSlot(long key, long hashCode) { int intHashCode = (int) hashCode; int slot = intHashCode & logicalHashBucketMask; @@ -230,20 +227,16 @@ protected long findReadSlot(long key, long hashCode) { long valueRef = slotPairs[pairIndex]; if (valueRef == 0) { // Given that we do not delete, an empty slot means no match. - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")"); return -1; } long tableKey = slotPairs[pairIndex + 1]; if (key == tableKey) { - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")"); - return slotPairs[pairIndex]; + return pairIndex; } // Some other key (collision) - keep probing. probeSlot += (++i); if (i > largestNumberOfSteps) { - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot returning not found"); // We know we never went that far when we were inserting. - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")"); return -1; } slot = (int)(probeSlot & logicalHashBucketMask); @@ -268,10 +261,10 @@ private void allocateBucketArray() { } public VectorMapJoinFastLongHashTable( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - this.isOuterJoin = isOuterJoin; this.hashTableKeyType = hashTableKeyType; PrimitiveTypeInfo[] primitiveTypeInfos = { hashTableKeyType.getPrimitiveTypeInfo() }; keyBinarySortableDeserializeRead = diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java index 2798010..4a63772 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java @@ -49,8 +49,8 @@ public void testPutRow(byte[] currentKey, byte[] currentValue) throws HiveExcept } public VectorMapJoinFastMultiKeyHashMap( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + boolean isSaveNullKeyValuesForFullOuter, + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java index 0560281..31aa95f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java @@ -47,8 +47,8 @@ public void testPutRow(byte[] currentKey) throws HiveException, IOException { } public VectorMapJoinFastMultiKeyHashMultiSet( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + boolean isSaveNullKeyValuesForFullOuter, + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java index 900ca55..ed8b989 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java @@ -47,8 +47,7 @@ public void testPutRow(byte[] currentKey) throws HiveException, IOException { } public VectorMapJoinFastMultiKeyHashSet( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java new file mode 100644 index 0000000..3d29cf4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; + +/** + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinFastNonMatchedIterator + extends VectorMapJoinNonMatchedIterator { + + protected int nonMatchedLogicalSlotNum; + + public VectorMapJoinFastNonMatchedIterator(MatchTracker matchTracker) { + super(matchTracker); + } + + @Override + public void init() { + nonMatchedLogicalSlotNum = -1; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java index 777eb45..1b108a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java @@ -35,11 +35,9 @@ public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastStringCommon.class); - private boolean isOuterJoin; - private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, + public boolean adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); @@ -47,7 +45,7 @@ public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { - return; + return false; } } catch (Exception e) { throw new HiveException( @@ -61,14 +59,14 @@ public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, keyBinarySortableDeserializeRead.currentBytesStart, keyBinarySortableDeserializeRead.currentBytesLength, currentValue); + return true; } - public VectorMapJoinFastStringCommon(boolean isOuterJoin) { - this.isOuterJoin = isOuterJoin; + public VectorMapJoinFastStringCommon() { PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo }; keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false); } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java index fc4edda..56068f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java @@ -30,18 +30,27 @@ */ public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMap { + private final boolean isSaveNullKeyValuesForFullOuter; + private VectorMapJoinFastStringCommon stringCommon; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { - stringCommon.adaptPutRow(this, currentKey, currentValue); + if (!stringCommon.adaptPutRow(this, currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + addFullOuterNullKeyValue(currentValue); + } + } } public VectorMapJoinFastStringHashMap( - boolean isOuterJoin, + boolean isSaveNullKeyValuesForFullOuter, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java index 3dbdfa7..911a61e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java @@ -30,18 +30,30 @@ */ public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesHashMultiSet { - private VectorMapJoinFastStringCommon stringCommon; + private final boolean isSaveNullKeyValuesForFullOuter; + + private final VectorMapJoinFastStringCommon stringCommon; + + private long fullOuterNullKeyValueCount; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { - stringCommon.adaptPutRow(this, currentKey, currentValue); + if (!stringCommon.adaptPutRow(this, currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + fullOuterNullKeyValueCount++; + } + } } public VectorMapJoinFastStringHashMultiSet( - boolean isOuterJoin, + boolean isSaveNullKeyValuesForFullOuter, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + fullOuterNullKeyValueCount = 0; + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java index 84f8439..3dc7847 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java @@ -33,15 +33,17 @@ private VectorMapJoinFastStringCommon stringCommon; @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + // Ignore NULL keys (HashSet not used for FULL OUTER). stringCommon.adaptPutRow(this, currentKey, currentValue); } public VectorMapJoinFastStringHashSet( - boolean isOuterJoin, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 24dfa5d..3e41ec0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -27,6 +27,8 @@ import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTableContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -73,11 +75,6 @@ public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf, this.estimatedKeyCount = estimatedKeyCount; - // LOG.info("VectorMapJoinFastTableContainer load keyCountAdj " + keyCountAdj); - // LOG.info("VectorMapJoinFastTableContainer load threshold " + threshold); - // LOG.info("VectorMapJoinFastTableContainer load loadFactor " + loadFactor); - // LOG.info("VectorMapJoinFastTableContainer load wbSize " + wbSize); - int newThreshold = HashMapWrapper.calculateTableSize( keyCountAdj, threshold, loadFactor, estimatedKeyCount); @@ -93,13 +90,11 @@ public VectorMapJoinHashTable vectorMapJoinHashTable() { private VectorMapJoinFastHashTable createHashTable(int newThreshold) { - boolean isOuterJoin = !desc.isNoOuterJoin(); - - // UNDONE VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.getHashTableImplementationType(); HashTableKind hashTableKind = vectorDesc.getHashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.getHashTableKeyType(); + boolean isSaveNullKeyValuesForFullOuter = vectorDesc.getIsSaveNullKeyValuesForFullOuter(); boolean minMaxEnabled = vectorDesc.getMinMaxEnabled(); int writeBufferSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE); @@ -115,18 +110,23 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastLongHashMap( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + isSaveNullKeyValuesForFullOuter, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastLongHashMultiSet( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + isSaveNullKeyValuesForFullOuter, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastLongHashSet( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -135,18 +135,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastStringHashMap( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastStringHashMultiSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastStringHashSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -155,18 +154,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastMultiKeyHashMap( - isOuterJoin, + isSaveNullKeyValuesForFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastMultiKeyHashMultiSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastMultiKeyHashSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -195,6 +193,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not applicable"); + } + + @Override public void clear() { // Do nothing } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java index 2408484..9ac93e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.serde2.WriteBuffers; /* * The interface for a single byte array key hash map lookup method. @@ -41,6 +43,9 @@ * The object to receive small table value(s) information on a MATCH. * Or, for SPILL, it has information on where to spill the big table row. * + * NOTE: Since the hash table can be shared, the hashMapResult serves as the non-shared + * private object for our accessing the hash table lookup values, etc. + * * @return * Whether the lookup was a match, no match, or spill (the partition with the key * is currently spilled). @@ -48,4 +53,16 @@ JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) throws IOException; + /* + * A version of lookup with match tracking. + * ... + * * @param matchTracker + * Optional key match tracking. + * + * NOTE: Since the hash table can be shared, the matchTracker serves as the non-shared + * private object for tracking our key matches in the hash table. + * ... + */ + JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException; } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java index 2d2490c..5762cff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java @@ -30,5 +30,4 @@ * access spill information when the partition with the key is currently spilled. */ VectorMapJoinHashMapResult createHashMapResult(); - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java index e49da04..ce5c597 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.hadoop.hive.common.MemoryEstimate; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.io.BytesWritable; @@ -31,7 +32,6 @@ */ public interface VectorMapJoinHashTable extends MemoryEstimate { - /* * @param currentKey * The current key. @@ -45,4 +45,10 @@ void putRow(BytesWritable currentKey, BytesWritable currentValue) * Get hash table size */ int size(); + + MatchTracker createMatchTracker(); + + VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker); + + int spillPartitionId(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java index ba68d35..c2267bf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.serde2.WriteBuffers; /* * The interface for a single long key hash map lookup method. @@ -43,4 +45,16 @@ */ JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult) throws IOException; + /* + * A version of lookup with match tracking. + * ... + * @param matchTracker + * Optional key match tracking. + * + * NOTE: Since the hash table can be shared, the matchTracker serves as the non-shared + * private object for tracking our key matches in the hash table. + * ... + */ + JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult, + MatchTracker matchTracker) throws IOException; } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java index d0f9dcb..74cfb9c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java @@ -27,5 +27,4 @@ boolean useMinMax(); long min(); long max(); - } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java new file mode 100644 index 0000000..911485b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable; + +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinNonMatchedIterator { + + protected final MatchTracker matchTracker; + + protected int nonMatchedLogicalSlotNum; + + public VectorMapJoinNonMatchedIterator(MatchTracker matchTracker) { + this.matchTracker = matchTracker; + } + + public void init() { + nonMatchedLogicalSlotNum = -1; + } + + public boolean findNextNonMatched() { + throw new RuntimeException("Not implemented"); + } + + public boolean readNonMatchedLongKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public long getNonMatchedLongKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public boolean readNonMatchedBytesKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public byte[] getNonMatchedBytes() { + throw new RuntimeException("Not implemented"); + } + + public int getNonMatchedBytesOffset() { + throw new RuntimeException("Not implemented"); + } + + public int getNonMatchedBytesLength() { + throw new RuntimeException("Not implemented"); + } + + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + throw new RuntimeException("Not implemented"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java index f95cd76..21c355c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java @@ -116,16 +116,4 @@ public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, } return hashTable; } - - /* - @Override - public com.esotericsoftware.kryo.io.Output getHybridBigTableSpillOutput(int partitionId) { - - HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTableContainer; - - HashPartition hp = ht.getHashPartitions()[partitionId]; - - return hp.getMatchfileOutput(); - } - */ } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java index 9242702..50a77f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java @@ -23,10 +23,14 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; public class VectorMapJoinOptimizedHashMap @@ -40,13 +44,18 @@ public VectorMapJoinHashMapResult createHashMapResult() { public static class HashMapResult extends VectorMapJoinHashMapResult { - private BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult; + private final BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult; public HashMapResult() { super(); bytesBytesMultiHashMapResult = new BytesBytesMultiHashMap.Result(); } + public HashMapResult(BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult) { + super(); + this.bytesBytesMultiHashMapResult = bytesBytesMultiHashMapResult; + } + public BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult() { return bytesBytesMultiHashMapResult; } @@ -106,7 +115,59 @@ public String toString() { public String getDetailedHashMapResultPositionString() { return "(Not supported yet)"; } - } + } + + protected static class NonMatchedBytesHashMapIterator + extends VectorMapJoinOptimizedNonMatchedIterator { + + private VectorMapJoinOptimizedHashMap hashMap; + + protected ByteSegmentRef keyRef; + + public NonMatchedBytesHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + nonMatchedIterator = + ((MapJoinTableContainer) hashMap.originalTableContainer). + createNonMatchedSmallTableIterator(matchTracker); + } + + public void doReadNonMatchedBytesKey() throws HiveException { + keyRef = nonMatchedIterator.getCurrentKeyAsRef(); + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + doReadNonMatchedBytesKey(); + return true; // We have not interpreted the bytes, so return true. + } + + @Override + public byte[] getNonMatchedBytes() { + return keyRef.getBytes(); + } + + @Override + public int getNonMatchedBytesOffset() { + return (int) keyRef.getOffset(); + } + + @Override + public int getNonMatchedBytesLength() { + return keyRef.getLength(); + } + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedBytesHashMapIterator(matchTracker, this); + } @Override public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyOffset, int keyLength, @@ -117,7 +178,21 @@ public String getDetailedHashMapResultPositionString() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashMapResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashMapResult); + (VectorMapJoinHashTableResult) hashMapResult, null); + + return joinResult; + } + + @Override + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyOffset, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + HashMapResult implementationHashMapResult = (HashMapResult) hashMapResult; + + JoinUtil.JoinResult joinResult = + doLookup(keyBytes, keyOffset, keyLength, + implementationHashMapResult.bytesBytesMultiHashMapResult(), + (VectorMapJoinHashTableResult) hashMapResult, matchTracker); return joinResult; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java index 9921a88..cfe128c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java @@ -91,7 +91,7 @@ public void forget() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashMultiSetResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashMultiSetResult); + (VectorMapJoinHashTableResult) hashMultiSetResult, null); return joinResult; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java index 122f881..8f53ada 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java @@ -66,7 +66,7 @@ public void forget() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashSetResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashSetResult); + (VectorMapJoinHashTableResult) hashSetResult, null); return joinResult; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java index 74887f7..44108f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java @@ -26,21 +26,26 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerDirectAccess; import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; -/* +/** * Root interface for a vector map join hash table (which could be a hash map, hash multi-set, or * hash set). */ -public abstract class VectorMapJoinOptimizedHashTable implements VectorMapJoinHashTable { +public abstract class VectorMapJoinOptimizedHashTable + implements VectorMapJoinHashTable, VectorMapJoinBytesHashTable { private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOptimizedMultiKeyHashMap.class.getName()); @@ -55,6 +60,16 @@ } @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override + public int spillPartitionId() { + return adapatorDirectAccess.directSpillPartitionId(); + } + + @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws SerDeException, HiveException, IOException { @@ -69,13 +84,13 @@ protected void putRowInternal(BytesWritable key, BytesWritable value) public JoinUtil.JoinResult doLookup(byte[] keyBytes, int keyOffset, int keyLength, BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult, - VectorMapJoinHashTableResult hashTableResult) { + VectorMapJoinHashTableResult hashTableResult, MatchTracker matchTracker) { hashTableResult.forget(); JoinUtil.JoinResult joinResult = adapatorDirectAccess.setDirect(keyBytes, keyOffset, keyLength, - bytesBytesMultiHashMapResult); + bytesBytesMultiHashMapResult, matchTracker); if (joinResult == JoinUtil.JoinResult.SPILL) { hashTableResult.setSpillPartitionId(adapatorDirectAccess.directSpillPartitionId()); } @@ -105,4 +120,9 @@ public long getEstimatedMemorySize() { size += (2 * JavaDataModel.get().object()); return size; } + + @Override + public MatchTracker createMatchTracker() { + return adapatorDirectAccess.createMatchTracker(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java index 9c45ed9..de1ee15 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java @@ -41,8 +41,6 @@ private HashTableKeyType hashTableKeyType; - // private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - private BinarySortableSerializeWrite keyBinarySortableSerializeWrite; private transient Output output; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java index b21f0b3..b2c20b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java @@ -23,9 +23,17 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* * An single long value hash map based on the BytesBytesMultiHashMap. @@ -37,8 +45,104 @@ extends VectorMapJoinOptimizedHashMap implements VectorMapJoinLongHashMap { + private HashTableKeyType hashTableKeyType; + private VectorMapJoinOptimizedLongCommon longCommon; + private static class NonMatchedLongHashMapIterator + extends VectorMapJoinOptimizedNonMatchedIterator { + + private VectorMapJoinOptimizedLongHashMap hashMap; + + // Extract long with non-shared deserializer object. + private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; + + private long longValue; + + NonMatchedLongHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedLongHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + nonMatchedIterator = + ((MapJoinTableContainer) hashMap.originalTableContainer). + createNonMatchedSmallTableIterator(matchTracker); + + TypeInfo integerTypeInfo; + switch (hashMap.hashTableKeyType) { + case BOOLEAN: + integerTypeInfo = TypeInfoFactory.booleanTypeInfo; + break; + case BYTE: + integerTypeInfo = TypeInfoFactory.byteTypeInfo; + break; + case SHORT: + integerTypeInfo = TypeInfoFactory.shortTypeInfo; + break; + case INT: + integerTypeInfo = TypeInfoFactory.intTypeInfo; + break; + case LONG: + integerTypeInfo = TypeInfoFactory.longTypeInfo; + break; + default: + throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); + } + keyBinarySortableDeserializeRead = + new BinarySortableDeserializeRead( + new TypeInfo[] {integerTypeInfo}, false); + } + + private boolean readNonMatchedLongKey(ByteSegmentRef keyRef) throws HiveException { + + try { + byte[] keyBytes = keyRef.getBytes(); + int keyOffset = (int) keyRef.getOffset(); + int keyLength = keyRef.getLength(); + keyBinarySortableDeserializeRead.set(keyBytes, keyOffset, keyLength); + if (!keyBinarySortableDeserializeRead.readNextField()) { + return false; + } + switch (hashMap.hashTableKeyType) { + case BOOLEAN: + longValue = keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0; + break; + case BYTE: + longValue = keyBinarySortableDeserializeRead.currentByte; + break; + case SHORT: + longValue = keyBinarySortableDeserializeRead.currentShort; + break; + case INT: + longValue = keyBinarySortableDeserializeRead.currentInt; + break; + case LONG: + longValue = keyBinarySortableDeserializeRead.currentLong; + break; + default: + throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); + } + } catch (IOException e) { + throw new HiveException(e); + } + return true; + } + + @Override + public boolean readNonMatchedLongKey() throws HiveException { + return readNonMatchedLongKey(nonMatchedIterator.getCurrentKeyAsRef()); + } + + @Override + public long getNonMatchedLongKey() throws HiveException { + return longValue; + } + } + @Override public boolean useMinMax() { return longCommon.useMinMax(); @@ -54,14 +158,10 @@ public long max() { return longCommon.max(); } - /* @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) - throws SerDeException, HiveException, IOException { - - longCommon.adaptPutRow((VectorMapJoinOptimizedHashTable) this, currentKey, currentValue); + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedLongHashMapIterator(matchTracker, this); } - */ @Override public JoinResult lookup(long key, @@ -73,10 +173,21 @@ public JoinResult lookup(long key, hashMapResult); } + @Override + public JoinResult lookup(long key, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = longCommon.serialize(key); + + return super.lookup(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + hashMapResult, matchTracker); + } + public VectorMapJoinOptimizedLongHashMap( boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); + this.hashTableKeyType = hashTableKeyType; longCommon = new VectorMapJoinOptimizedLongCommon(minMaxEnabled, isOuterJoin, hashTableKeyType); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java index 3e8e6fb..e07bbaa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java @@ -27,8 +27,6 @@ public class VectorMapJoinOptimizedMultiKeyHashMap extends VectorMapJoinOptimizedHashMap { - // UNDONE: How to look for all NULLs in a multi-key????? Let nulls through for now. - public VectorMapJoinOptimizedMultiKeyHashMap(boolean isOuterJoin, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java new file mode 100644 index 0000000..6f9c770 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized; + +import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedHashMap.HashMapResult; + +/** + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinOptimizedNonMatchedIterator + extends VectorMapJoinNonMatchedIterator { + + protected NonMatchedSmallTableIterator nonMatchedIterator; + + protected HashMapResult nonMatchedHashMapResult; + + public VectorMapJoinOptimizedNonMatchedIterator(MatchTracker matchTracker) { + super(matchTracker); + } + + @Override + public boolean findNextNonMatched() { + return nonMatchedIterator.isNext(); + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + if (nonMatchedHashMapResult == null) { + nonMatchedHashMapResult = new HashMapResult(nonMatchedIterator.getHashMapResult()); + } + nonMatchedHashMapResult.setJoinResult(JoinResult.MATCH); + return nonMatchedHashMapResult; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java index a8ccfa4..da0e836 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java @@ -26,19 +26,9 @@ /* * An single byte array value hash map based on the BytesBytesMultiHashMap. - * - * Since BytesBytesMultiHashMap does not interpret the key as BinarySortable we optimize - * this case and just reference the byte array key directly for the lookup instead of serializing - * the byte array into BinarySortable. We rely on it just doing byte array equality comparisons. */ public class VectorMapJoinOptimizedStringCommon { - // private boolean isOuterJoin; - - // private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - - // private ReadStringResults readStringResults; - private BinarySortableSerializeWrite keyBinarySortableSerializeWrite; private transient Output output; @@ -55,18 +45,13 @@ public SerializedBytes serialize(byte[] keyBytes, int keyStart, int keyLength) t serializedBytes.length = output.getLength(); return serializedBytes; - } public VectorMapJoinOptimizedStringCommon(boolean isOuterJoin) { - // this.isOuterJoin = isOuterJoin; - // PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo }; - // keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos); - // readStringResults = keyBinarySortableDeserializeRead.createReadStringResults(); - // bytesWritable = new BytesWritable(); + keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(1); output = new Output(); keyBinarySortableSerializeWrite.set(output); serializedBytes = new SerializedBytes(); } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java index f2074ec..fed8628 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java @@ -22,12 +22,19 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* - * An multi-key hash map based on the BytesBytesMultiHashMap. + * An string hash map based on the BytesBytesMultiHashMap. */ public class VectorMapJoinOptimizedStringHashMap extends VectorMapJoinOptimizedHashMap @@ -35,14 +42,59 @@ private VectorMapJoinOptimizedStringCommon stringCommon; - /* - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) - throws SerDeException, HiveException, IOException { + private static class NonMatchedStringHashMapIterator extends NonMatchedBytesHashMapIterator { + + private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; + + NonMatchedStringHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedStringHashMap hashMap) { + super(matchTracker, hashMap); + } + + @Override + public void init() { + super.init(); + + TypeInfo[] typeInfos = new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + keyBinarySortableDeserializeRead = + new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */ false); + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + super.doReadNonMatchedBytesKey(); - stringCommon.adaptPutRow((VectorMapJoinOptimizedHashTable) this, currentKey, currentValue); + byte[] bytes = keyRef.getBytes(); + final int keyOffset = (int) keyRef.getOffset(); + final int keyLength = keyRef.getLength(); + try { + keyBinarySortableDeserializeRead.set(bytes, keyOffset, keyLength); + return keyBinarySortableDeserializeRead.readNextField(); + } catch (IOException e) { + throw new HiveException(e); + } + } + + @Override + public byte[] getNonMatchedBytes() { + return keyBinarySortableDeserializeRead.currentBytes; + } + + @Override + public int getNonMatchedBytesOffset() { + return keyBinarySortableDeserializeRead.currentBytesStart; + } + + @Override + public int getNonMatchedBytesLength() { + return keyBinarySortableDeserializeRead.currentBytesLength; + } + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedStringHashMapIterator(matchTracker, this); } - */ @Override public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, @@ -55,6 +107,17 @@ public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, } + @Override + public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = stringCommon.serialize(keyBytes, keyStart, keyLength); + + return super.lookup(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + hashMapResult, matchTracker); + + } + public VectorMapJoinOptimizedStringHashMap(boolean isOuterJoin, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java index ab97b3a..e17a17f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java @@ -515,6 +515,9 @@ protected MapJoinOperator convertJoinToBucketMapJoin( joinContext.getBigTablePosition(), false, false); + if (mapJoinOp == null) { + return null; + } // Remove the join operator from the query join context // Data structures coming from QBJoinTree mapJoinOp.getConf().setQBJoinTreeProps(joinOp.getConf()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 52855e0..694fe43 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -141,8 +141,9 @@ numBuckets = 1; } LOG.info("Estimated number of buckets " + numBuckets); - int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets, false, maxJoinMemory, true); - if (mapJoinConversionPos < 0) { + MapJoinConversion mapJoinConversion = + getMapJoinConversion(joinOp, context, numBuckets, false, maxJoinMemory, true); + if (mapJoinConversion == null) { Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx); if (retval == null) { return retval; @@ -158,10 +159,10 @@ if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) { // Check if we are in LLAP, if so it needs to be determined if we should use BMJ or DPHJ if (llapInfo != null) { - if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversionPos, numBuckets)) { + if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversion, numBuckets)) { return null; } - } else if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) { + } else if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx)) { return null; } } @@ -170,16 +171,27 @@ // check if we can convert to map join no bucket scaling. LOG.info("Convert to non-bucketed map join"); if (numBuckets != 1) { - mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1, false, maxJoinMemory, true); + mapJoinConversion = getMapJoinConversion(joinOp, context, 1, false, maxJoinMemory, true); } - if (mapJoinConversionPos < 0) { + if (mapJoinConversion == null) { // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. fallbackToReduceSideJoin(joinOp, context); return null; } - MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos, true); + // Currently, this is a MJ path and we don's support FULL OUTER MapJoin yet. + if (mapJoinConversion.getIsFullOuterJoin() && + !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) { + fallbackToReduceSideJoin(joinOp, context); + return null; + } + + MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true); + if (mapJoinOp == null) { + fallbackToReduceSideJoin(joinOp, context); + return null; + } // map join operator by default has no bucket cols and num of reduce sinks // reduced by 1 mapJoinOp.setOpTraits(new OpTraits(null, -1, null, @@ -196,11 +208,11 @@ private boolean selectJoinForLlap(OptimizeTezProcContext context, JoinOperator joinOp, TezBucketJoinProcCtx tezBucketJoinProcCtx, LlapClusterStateForCompile llapInfo, - int mapJoinConversionPos, int numBuckets) throws SemanticException { + MapJoinConversion mapJoinConversion, int numBuckets) throws SemanticException { if (!context.conf.getBoolVar(HiveConf.ConfVars.HIVEDYNAMICPARTITIONHASHJOIN) && numBuckets > 1) { // DPHJ is disabled, only attempt BMJ or mapjoin - return convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx); + return convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx); } int numExecutorsPerNode = -1; @@ -216,6 +228,7 @@ private boolean selectJoinForLlap(OptimizeTezProcContext context, JoinOperator j LOG.debug("Number of nodes = " + numNodes + ". Number of Executors per node = " + numExecutorsPerNode); // Determine the size of small table inputs + final int mapJoinConversionPos = mapJoinConversion.getBigTablePos(); long totalSize = 0; for (int pos = 0; pos < joinOp.getParentOperators().size(); pos++) { if (pos == mapJoinConversionPos) { @@ -244,7 +257,7 @@ private boolean selectJoinForLlap(OptimizeTezProcContext context, JoinOperator j return convertJoinDynamicPartitionedHashJoin(joinOp, context); } else if (numBuckets > 1) { LOG.info("Bucket Map Join chosen"); - return convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx); + return convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx); } // fallback to mapjoin no bucket scaling LOG.info("Falling back to mapjoin no bucket scaling"); @@ -484,8 +497,13 @@ private void setAllChildrenTraits(Operator currentOp, Op } private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, - int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException { + MapJoinConversion mapJoinConversion, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException { + if (mapJoinConversion.getIsFullOuterJoin() && + !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) { + return false; + } + final int bigTablePosition = mapJoinConversion.getBigTablePos(); if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) { LOG.info("Check conversion to bucket map join failed."); return false; @@ -517,7 +535,7 @@ private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcCon } } - MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition, true); + MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true); if (mapJoinOp == null) { LOG.debug("Conversion to bucket map join failed."); return false; @@ -838,6 +856,43 @@ private boolean isCrossProduct(JoinOperator joinOp) { return true; } + public static class MapJoinConversion { + + private final int bigTablePos; + + private final boolean isFullOuterJoin; + private final boolean isFullOuterEnabledForDynamicPartitionHashJoin; + private final boolean isFullOuterEnabledForMapJoin; + + public MapJoinConversion(int bigTablePos, boolean isFullOuterJoin, + boolean isFullOuterEnabledForDynamicPartitionHashJoin, boolean isFullOuterEnabledForMapJoin) { + this.bigTablePos = bigTablePos; + + this.isFullOuterJoin = isFullOuterJoin; + this.isFullOuterEnabledForDynamicPartitionHashJoin = isFullOuterEnabledForDynamicPartitionHashJoin; + this.isFullOuterEnabledForMapJoin = isFullOuterEnabledForMapJoin; + } + + public int getBigTablePos() { + return bigTablePos; + } + + /* + * Do we have a single FULL OUTER JOIN here? + */ + public boolean getIsFullOuterJoin() { + return isFullOuterJoin; + } + + public boolean getIsFullOuterEnabledForDynamicPartitionHashJoin() { + return isFullOuterEnabledForDynamicPartitionHashJoin; + } + + public boolean getIsFullOuterEnabledForMapJoin() { + return isFullOuterEnabledForMapJoin; + } + } + /** * Obtain big table position for join. * @@ -851,9 +906,11 @@ private boolean isCrossProduct(JoinOperator joinOp) { * @return returns big table position or -1 if it cannot be determined * @throws SemanticException */ - public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext context, + public MapJoinConversion getMapJoinConversion(JoinOperator joinOp, OptimizeTezProcContext context, int buckets, boolean skipJoinTypeChecks, long maxSize, boolean checkMapJoinThresholds) throws SemanticException { + JoinDesc joinDesc = joinOp.getConf(); + JoinCondDesc[] conds = joinDesc.getConds(); if (!skipJoinTypeChecks) { /* * HIVE-9038: Join tests fail in tez when we have more than 1 join on the same key and there is @@ -862,14 +919,32 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c * new operation to be able to support this. This seems like a corner case enough to special * case this for now. */ - if (joinOp.getConf().getConds().length > 1) { + if (conds.length > 1) { if (hasOuterJoin(joinOp)) { - return -1; + return null; } } } + + // Assume. + boolean isFullOuterEnabledForDynamicPartitionHashJoin = false; + boolean isFullOuterEnabledForMapJoin = false; + + boolean isFullOuterJoin = + MapJoinProcessor.precheckFullOuter(context.conf, joinOp); + if (isFullOuterJoin) { + + boolean isFullOuterEnabled = MapJoinProcessor.isFullOuterMapEnabled(context.conf, joinOp); + if (isFullOuterEnabled) { + + // FUTURE: Currently, we only support DPHJ. + isFullOuterEnabledForDynamicPartitionHashJoin = + MapJoinProcessor.isFullOuterEnabledForDynamicPartitionHashJoin(context.conf, joinOp); + } + } + Set bigTableCandidateSet = - MapJoinProcessor.getBigTableCandidates(joinOp.getConf().getConds()); + MapJoinProcessor.getBigTableCandidates(conds, /* isSupportFullOuter */ true); int bigTablePosition = -1; // big input cumulative row count long bigInputCumulativeCardinality = -1L; @@ -892,7 +967,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c Statistics currInputStat = parentOp.getStatistics(); if (currInputStat == null) { LOG.warn("Couldn't get statistics from: " + parentOp); - return -1; + return null; } long inputSize = computeOnlineDataSize(currInputStat); @@ -905,14 +980,14 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c if (foundInputNotFittingInMemory) { // cannot convert to map join; we've already chosen a big table // on size and there's another one that's bigger. - return -1; + return null; } if (inputSize/buckets > maxSize) { if (!bigTableCandidateSet.contains(pos)) { // can't use the current table as the big table, but it's too // big for the map side. - return -1; + return null; } currentInputNotFittingInMemory = true; @@ -927,7 +1002,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c Long cardinality = computeCumulativeCardinality(parentOp); if (cardinality == null) { // We could not get stats, we cannot convert - return -1; + return null; } currentInputCumulativeCardinality = cardinality; } @@ -966,7 +1041,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c if (totalSize/buckets > maxSize) { // sum of small tables size in this join exceeds configured limit // hence cannot convert. - return -1; + return null; } if (selectedBigTable) { @@ -981,7 +1056,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c if (checkMapJoinThresholds && convertDPHJ && checkShuffleSizeForLargeTable(joinOp, bigTablePosition, context)) { LOG.debug("Conditions to convert to MapJoin are not met"); - return -1; + return null; } // only allow cross product in map joins if build side is 'small' @@ -995,7 +1070,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c HiveConf.getIntVar(context.conf, HiveConf.ConfVars.XPRODSMALLTABLEROWSTHRESHOLD)) { // if any of smaller side is estimated to generate more than // threshold rows we would disable mapjoin - return -1; + return null; } } } @@ -1006,7 +1081,12 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c // equal to sum of small tables size. joinOp.getConf().setInMemoryDataSize(totalSize/buckets); - return bigTablePosition; + return + new MapJoinConversion( + bigTablePosition, + isFullOuterJoin, + isFullOuterEnabledForDynamicPartitionHashJoin, + isFullOuterEnabledForMapJoin); } // This is akin to CBO cumulative cardinality model @@ -1055,7 +1135,8 @@ private static Long computeCumulativeCardinality(Operator parentOp : joinOp.getParentOperators()) { @@ -1065,15 +1146,20 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo } // can safely convert the join to a map join. + final int bigTablePosition = mapJoinConversion.getBigTablePos(); MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink); - mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, + if (mapJoinOp == null) { + return null; + } + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + mapJoinDesc.setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)); - List joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next(); + List joinExprs = mapJoinDesc.getKeys().values().iterator().next(); if (joinExprs.size() == 0) { // In case of cross join, we disable hybrid grace hash join - mapJoinOp.getConf().setHybridHashJoin(false); + mapJoinDesc.setHybridHashJoin(false); } Operator parentBigTableOp = @@ -1281,18 +1367,29 @@ private boolean convertJoinDynamicPartitionedHashJoin(JoinOperator joinOp, Optim // Since we don't have big table index yet, must start with estimate of numReducers int numReducers = estimateNumBuckets(joinOp, false); LOG.info("Try dynamic partitioned hash join with estimated " + numReducers + " reducers"); - int bigTablePos = getMapJoinConversionPos(joinOp, context, numReducers, false, maxJoinMemory, false); - if (bigTablePos >= 0) { + MapJoinConversion mapJoinConversion = + getMapJoinConversion(joinOp, context, numReducers, false, maxJoinMemory, false); + if (mapJoinConversion != null) { + if (mapJoinConversion.getIsFullOuterJoin() && + !mapJoinConversion.getIsFullOuterEnabledForDynamicPartitionHashJoin()) { + return false; + } + final int bigTablePos = mapJoinConversion.getBigTablePos(); + // Now that we have the big table index, get real numReducers value based on big table RS ReduceSinkOperator bigTableParentRS = (ReduceSinkOperator) (joinOp.getParentOperators().get(bigTablePos)); numReducers = bigTableParentRS.getConf().getNumReducers(); LOG.debug("Real big table reducers = " + numReducers); - MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePos, false); + MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, false); if (mapJoinOp != null) { LOG.info("Selected dynamic partitioned hash join"); - mapJoinOp.getConf().setDynamicPartitionHashJoin(true); + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + mapJoinDesc.setDynamicPartitionHashJoin(true); + if (mapJoinConversion.getIsFullOuterJoin()) { + FullOuterMapJoinOptimization.removeFilterMap(mapJoinDesc); + } // Set OpTraits for dynamically partitioned hash join: // bucketColNames: Re-use previous joinOp's bucketColNames. Parent operators should be // reduce sink, which should have bucket columns based on the join keys. @@ -1333,11 +1430,15 @@ private void fallbackToReduceSideJoin(JoinOperator joinOp, OptimizeTezProcContex private void fallbackToMergeJoin(JoinOperator joinOp, OptimizeTezProcContext context) throws SemanticException { - int pos = getMapJoinConversionPos(joinOp, context, estimateNumBuckets(joinOp, false), - true, Long.MAX_VALUE, false); - if (pos < 0) { + MapJoinConversion mapJoinConversion = + getMapJoinConversion( + joinOp, context, estimateNumBuckets(joinOp, false), true, Long.MAX_VALUE, false); + final int pos; + if (mapJoinConversion == null || mapJoinConversion.getBigTablePos() == -1) { LOG.info("Could not get a valid join position. Defaulting to position 0"); pos = 0; + } else { + pos = mapJoinConversion.getBigTablePos(); } LOG.info("Fallback to common merge join operator"); convertJoinSMBJoin(joinOp, context, pos, 0, false); @@ -1360,7 +1461,6 @@ private boolean checkNumberOfEntriesForHashTable(JoinOperator joinOp, int positi for (String key : keys) { ColStatistics cs = inputStats.getColumnStatisticsFromColName(key); if (cs == null) { - LOG.debug("Couldn't get statistics for: {}", key); return true; } columnStats.add(cs); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java new file mode 100644 index 0000000..f9135b5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.google.common.base.Joiner; + +/** + * FULL OUTER MapJoin planning. + */ +public class FullOuterMapJoinOptimization { + + public static void removeFilterMap(MapJoinDesc mapJoinDesc) throws SemanticException { + int[][] filterMaps = mapJoinDesc.getFilterMap(); + if (filterMaps == null) { + return; + } + final byte posBigTable = (byte) mapJoinDesc.getPosBigTable(); + final int numAliases = mapJoinDesc.getExprs().size(); + List valueFilteredTblDescs = mapJoinDesc.getValueFilteredTblDescs(); + for (byte pos = 0; pos < numAliases; pos++) { + if (pos != posBigTable) { + int[] filterMap = filterMaps[pos]; + TableDesc tableDesc = valueFilteredTblDescs.get(pos); + Properties properties = tableDesc.getProperties(); + String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS); + String columnNameDelimiter = + properties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? + properties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : + String.valueOf(SerDeUtils.COMMA); + + String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES); + List columnNameList; + if (columnNameProperty.length() == 0) { + columnNameList = new ArrayList(); + } else { + columnNameList = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); + } + List truncatedColumnNameList = columnNameList.subList(0, columnNameList.size() - 1); + String truncatedColumnNameProperty = + Joiner.on(columnNameDelimiter).join(truncatedColumnNameList); + + List columnTypeList; + if (columnTypeProperty.length() == 0) { + columnTypeList = new ArrayList(); + } else { + columnTypeList = TypeInfoUtils + .getTypeInfosFromTypeString(columnTypeProperty); + } + if (!columnTypeList.get(columnTypeList.size() - 1).equals(TypeInfoFactory.shortTypeInfo)) { + throw new SemanticException("Expecting filterTag smallint as last column type"); + } + List truncatedColumnTypeList = + columnTypeList.subList(0, columnTypeList.size() - 1); + String truncatedColumnTypeProperty = + Joiner.on(",").join(truncatedColumnTypeList); + + properties.setProperty(serdeConstants.LIST_COLUMNS, truncatedColumnNameProperty); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, truncatedColumnTypeProperty); + } + } + mapJoinDesc.setFilterMap(null); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index bae80f3..5babe16 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -26,6 +26,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.Stack; @@ -48,6 +49,7 @@ import org.apache.hadoop.hive.ql.exec.ScriptOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; @@ -56,6 +58,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.EnabledOverride; import org.apache.hadoop.hive.ql.parse.GenMapRedWalker; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -75,10 +78,13 @@ import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Preconditions; + /** * Implementation of one of the rule-based map join optimization. User passes hints to specify * map-joins and during this optimization, all user specified map joins are converted to MapJoins - @@ -336,6 +342,9 @@ public MapJoinOperator convertMapJoin(HiveConf conf, // create the map-join operator MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin); + if (mapJoinOp == null) { + return null; + } // remove old parents for (pos = 0; pos < newParentOps.size(); pos++) { @@ -356,6 +365,201 @@ public MapJoinOperator convertMapJoin(HiveConf conf, return mapJoinOp; } + public static boolean onExpressionHasNullSafes(JoinDesc desc) { + boolean[] nullSafes = desc.getNullSafes(); + if (nullSafes == null) { + return false; + } + for (boolean nullSafe : nullSafes) { + if (nullSafe) { + return true; + } + } + return false; + } + + private static boolean checkFullOuterMapJoinCompatible(HiveConf hiveConf, + JoinOperator joinOp) throws SemanticException { + JoinDesc joinDesc = joinOp.getConf(); + + // Make sure all key and value expressions are columns. + for (Entry> mapEntry : joinDesc.getExprs().entrySet()) { + List exprList = mapEntry.getValue(); + for (ExprNodeDesc expr : exprList) { + if (!(expr instanceof ExprNodeColumnDesc)) { + LOG.debug("FULL OUTER MapJoin: only column expressions are supported " + expr.toString()); + return false; + } + } + } + + // Check for supported key data types. + Byte[] order = joinDesc.getTagOrder(); + ExprNodeDesc[][] joinKeysArray = joinDesc.getJoinKeys(); + for (int i = 0; i < order.length; i++) { + byte pos = order[i]; + ExprNodeDesc[] keyExprs = joinKeysArray[pos]; + for (ExprNodeDesc keyExpr : keyExprs) { + TypeInfo typeInfo = keyExpr.getTypeInfo(); + + // Verify we handle the key column types for an optimized table. This is the effectively + // the same check used in Tez HashTableLoader. + if (!MapJoinKey.isSupportedField(typeInfo)) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + " key type " + typeInfo.toString() + " not supported"); + } + return false; + } + } + } + + if (onExpressionHasNullSafes(joinDesc)) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + "nullsafe not supported"); + } + return false; + } + + if (joinDesc.getResidualFilterExprs() != null && + joinDesc.getResidualFilterExprs().size() != 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + "non-equi joins not supported"); + } + return false; + } + + return true; + } + + public static boolean precheckFullOuter(HiveConf hiveConf, JoinOperator joinOp) + throws SemanticException { + JoinDesc joinDesc = joinOp.getConf(); + JoinCondDesc[] conds = joinDesc.getConds(); + + /* + * Are we even being asked to do a FULL OUTER JOIN? + */ + boolean hasFullOuterJoin = false; + for (JoinCondDesc cond : conds) { + if (cond.getType() == JoinDesc.FULL_OUTER_JOIN) { + hasFullOuterJoin = true; + break; + } + } + if (!hasFullOuterJoin) { + return false; + } + + if (conds.length > 1) { + + // No multiple condition FULL OUTER MapJoin. + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: multiple JOIN conditions not supported"); + } + return false; + } + + return true; + } + + public static boolean isFullOuterMapEnabled(HiveConf hiveConf, JoinOperator joinOp) + throws SemanticException { + + final String testMapJoinFullOuterOverrideString = + HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_MAPJOINFULLOUER_OVERRIDE); + EnabledOverride mapJoinFullOuterOverride = + EnabledOverride.nameMap.get(testMapJoinFullOuterOverrideString); + + final boolean isEnabled = + HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVEMAPJOINFULLOUER); + switch (mapJoinFullOuterOverride) { + case NONE: + { + if (!isEnabled) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + HiveConf.ConfVars.HIVEMAPJOINFULLOUER.varname + " is false"); + } + return false; + } + } + break; + case DISABLE: + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + HiveConf.ConfVars.HIVE_TEST_MAPJOINFULLOUER_OVERRIDE.varname + " is disable (" + + " " + HiveConf.ConfVars.HIVEMAPJOINFULLOUER.varname + " is " + isEnabled + ")"); + } + return false; + case ENABLE: + + // Different parts of the code may rely on this being set... + HiveConf.setBoolVar(hiveConf, + HiveConf.ConfVars.HIVEMAPJOINFULLOUER, true); + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin is enabled: " + + HiveConf.ConfVars.HIVE_TEST_MAPJOINFULLOUER_OVERRIDE.varname + " is enable (" + + " " + HiveConf.ConfVars.HIVEMAPJOINFULLOUER.varname + " is " + isEnabled + ")"); + } + break; + default: + throw new RuntimeException("Unexpected vectorization enabled override " + + mapJoinFullOuterOverride); + } + + final String engine = + HiveConf.getVar( + hiveConf, + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + final boolean isTezEngine = engine.equalsIgnoreCase("tez"); + if (!isTezEngine) { + + // Only Tez for now. + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: Only Tez engine supported"); + } + return false; + } + + /* + * Optimized Hash Table (i.e. not old-style MR HashMap). + */ + final boolean isOptimizedHashTableEnabled = + HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); + if (!isOptimizedHashTableEnabled) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE.varname + " is false"); + } + return false; + } + + boolean isCompatibleFullOuterMapJoin = checkFullOuterMapJoinCompatible(hiveConf, joinOp); + if (!isCompatibleFullOuterMapJoin) { + return false; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin enabled"); + } + return true; + } + + public static boolean isFullOuterEnabledForDynamicPartitionHashJoin(HiveConf hiveConf, JoinOperator joinOp) + throws SemanticException { + JoinDesc joinDesc = joinOp.getConf(); + + return true; + } + public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List mapAliases, int mapJoinPos, boolean noCheckOuterJoin) throws SemanticException { @@ -371,6 +575,9 @@ public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, MapJoinDesc mapJoinDescriptor = getMapJoinDesc(hconf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin, adjustParentsChildren); + if (mapJoinDescriptor == null) { + return null; + } // reduce sink row resolver used to generate map join op RowSchema outputRS = op.getSchema(); @@ -474,6 +681,9 @@ public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator o MapJoinOperator mapJoinOp = convertMapJoin(pctx.getConf(), op, op.getConf().isLeftInputJoin(), op.getConf().getBaseSrc(), op.getConf().getMapAliases(), mapJoinPos, noCheckOuterJoin, true); + if (mapJoinOp == null) { + return null; + } // create a dummy select to select all columns genSelectPlan(pctx, mapJoinOp); @@ -502,8 +712,29 @@ public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator o * @return set of big table candidates */ public static Set getBigTableCandidates(JoinCondDesc[] condns) { + return getBigTableCandidates(condns, /* isSupportFullOuter */ false); + } + + public static Set getBigTableCandidates(JoinCondDesc[] condns, + boolean isSupportFullOuter) { + Set bigTableCandidates = new HashSet(); + if (condns.length == 1) { + JoinCondDesc condn = condns[0]; + if (condn.getType() == JoinDesc.FULL_OUTER_JOIN) { + + if (!isSupportFullOuter) { + return new HashSet(); + } + + // FULL OUTER MapJoin must be a single condition. + bigTableCandidates.add(condn.getLeft()); + bigTableCandidates.add(condn.getRight()); + return bigTableCandidates; + } + } + boolean seenOuterJoin = false; Set seenPostitions = new HashSet(); Set leftPosListOfLastRightOuterJoin = new HashSet(); @@ -512,17 +743,14 @@ public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator o boolean lastSeenRightOuterJoin = false; for (JoinCondDesc condn : condns) { int joinType = condn.getType(); + if (joinType == JoinDesc.FULL_OUTER_JOIN) { + return new HashSet(); + } + seenPostitions.add(condn.getLeft()); seenPostitions.add(condn.getRight()); - if (joinType == JoinDesc.FULL_OUTER_JOIN) { - // setting these 2 parameters here just in case that if the code got - // changed in future, these 2 are not missing. - seenOuterJoin = true; - lastSeenRightOuterJoin = false; - // empty set - cannot convert - return new HashSet(); - } else if (joinType == JoinDesc.LEFT_OUTER_JOIN + if (joinType == JoinDesc.LEFT_OUTER_JOIN || joinType == JoinDesc.LEFT_SEMI_JOIN) { seenOuterJoin = true; if(bigTableCandidates.size() == 0) { @@ -564,7 +792,8 @@ public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator o * @throws SemanticException if given position is not in the big table candidates. */ public static int checkMapJoin(int mapJoinPos, JoinCondDesc[] condns) { - Set bigTableCandidates = MapJoinProcessor.getBigTableCandidates(condns); + Set bigTableCandidates = + MapJoinProcessor.getBigTableCandidates(condns, /* isSupportFullOuter */ true); // bigTableCandidates can never be null if (!bigTableCandidates.contains(mapJoinPos)) { @@ -1180,6 +1409,9 @@ public static MapJoinDesc getMapJoinDesc(HiveConf hconf, } List keyCols = keyExprMap.get((byte) mapJoinPos); + if (keyCols == null) { + return null; + } List outputColumnNames = op.getConf().getOutputColumnNames(); TableDesc keyTableDesc = diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java index 16d088a..a5400d6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java @@ -64,6 +64,9 @@ public MapJoinOperator convertMapJoin(HiveConf conf, MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, op.getConf().isLeftInputJoin(), op.getConf().getBaseSrc(), op.getConf().getMapAliases(), bigTablePos, noCheckOuterJoin); + if (mapJoinOp == null) { + return null; + } // 1. remove RS as parent for the big table branch // 2. remove old join op from child set of all the RSs diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 48974f8..31df289 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -73,6 +73,9 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterLongOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterMultiKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterStringOperator; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -122,6 +125,7 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MergeJoinWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; @@ -289,15 +293,15 @@ private HiveConf hiveConf; - public static enum VectorizationEnabledOverride { + public static enum EnabledOverride { NONE, DISABLE, ENABLE; - public final static Map nameMap = - new HashMap(); + public final static Map nameMap = + new HashMap(); static { - for (VectorizationEnabledOverride vectorizationEnabledOverride : values()) { + for (EnabledOverride vectorizationEnabledOverride : values()) { nameMap.put( vectorizationEnabledOverride.name().toLowerCase(), vectorizationEnabledOverride); } @@ -305,7 +309,7 @@ } boolean isVectorizationEnabled; - private VectorizationEnabledOverride vectorizationEnabledOverride; + private EnabledOverride vectorizationEnabledOverride; boolean isTestForcedVectorizationEnable; private boolean useVectorizedInputFileFormat; @@ -725,10 +729,63 @@ public VectorDesc getVectorDesc() { } } - private List> newOperatorList() { + private static List> newOperatorList() { return new ArrayList>(); } + public static void debugDisplayJoinOperatorTree(Operator joinOperator, + String prefix) { + List> currentParentList = newOperatorList(); + currentParentList.add(joinOperator); + + int depth = 0; + do { + List> nextParentList = newOperatorList(); + + final int count = currentParentList.size(); + for (int i = 0; i < count; i++) { + Operator parent = currentParentList.get(i); + System.out.println(prefix + " parent depth " + depth + " " + + parent.getClass().getSimpleName() + " " + parent.toString()); + + List> parentList = parent.getParentOperators(); + if (parentList == null || parentList.size() == 0) { + continue; + } + + nextParentList.addAll(parentList); + } + + currentParentList = nextParentList; + depth--; + } while (currentParentList.size() > 0); + + List> currentChildList = newOperatorList(); + currentChildList.addAll(joinOperator.getChildOperators()); + + depth = 1; + do { + List> nextChildList = newOperatorList(); + + final int count = currentChildList.size(); + for (int i = 0; i < count; i++) { + Operator child = currentChildList.get(i); + System.out.println(prefix + " child depth " + depth + " " + + child.getClass().getSimpleName() + " " + child.toString()); + + List> childList = child.getChildOperators(); + if (childList == null || childList.size() == 0) { + continue; + } + + nextChildList.addAll(childList); + } + + currentChildList = nextChildList; + depth--; + } while (currentChildList.size() > 0); + } + private Operator validateAndVectorizeOperatorTree( Operator nonVecRootOperator, boolean isReduce, boolean isTezOrSpark, @@ -973,7 +1030,15 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) if (isReduceVectorizationEnabled) { convertReduceWork(reduceWork); } + logReduceWorkExplainVectorization(reduceWork); + } else if (baseWork instanceof MergeJoinWork){ + MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork; + + // Always set the EXPLAIN conditions. + setMergeJoinWorkExplainConditions(mergeJoinWork); + + logMergeJoinWorkExplainVectorization(mergeJoinWork); } } } else if (currTask instanceof SparkTask) { @@ -1029,6 +1094,11 @@ private void setReduceWorkExplainConditions(ReduceWork reduceWork) { HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE)); } + private void setMergeJoinWorkExplainConditions(MergeJoinWork mergeJoinWork) { + + setExplainConditions(mergeJoinWork); + } + private boolean logExplainVectorization(BaseWork baseWork, String name) { if (!baseWork.getVectorizationExamined()) { @@ -1101,6 +1171,13 @@ private void logReduceWorkExplainVectorization(ReduceWork reduceWork) { LOG.info("Reducer engine: " + reduceWork.getVectorReduceEngine()); } + private void logMergeJoinWorkExplainVectorization(MergeJoinWork mergeJoinWork) { + + if (!logExplainVectorization(mergeJoinWork, "MergeJoin")) { + return; + } + } + private void convertMapWork(MapWork mapWork, boolean isTezOrSpark) throws SemanticException { // We have to evaluate the input format to see if vectorization is enabled, so @@ -2319,7 +2396,7 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_ENABLED_OVERRIDE); vectorizationEnabledOverride = - VectorizationEnabledOverride.nameMap.get(vectorizationEnabledOverrideString); + EnabledOverride.nameMap.get(vectorizationEnabledOverrideString); isVectorizationEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); @@ -3264,7 +3341,7 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; HashTableKind hashTableKind = HashTableKind.NONE; HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; - VectorMapJoinVariation vectorMapJoinVariation = VectorMapJoinVariation.NONE; + VectorMapJoinVariation vectorMapJoinVariation = null; if (vectorDesc.getIsFastHashTableEnabled()) { hashTableImplementationType = HashTableImplementationType.FAST; @@ -3334,6 +3411,10 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { vectorMapJoinVariation = VectorMapJoinVariation.OUTER; hashTableKind = HashTableKind.HASH_MAP; break; + case JoinDesc.FULL_OUTER_JOIN: + vectorMapJoinVariation = VectorMapJoinVariation.FULL_OUTER; + hashTableKind = HashTableKind.HASH_MAP; + break; case JoinDesc.LEFT_SEMI_JOIN: vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI; hashTableKind = HashTableKind.HASH_SET; @@ -3363,6 +3444,9 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterLongOperator.class; break; + case FULL_OUTER: + opClass = VectorMapJoinFullOuterLongOperator.class; + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3381,6 +3465,9 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterStringOperator.class; break; + case FULL_OUTER: + opClass = VectorMapJoinFullOuterStringOperator.class; + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3399,6 +3486,9 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterMultiKeyOperator.class; break; + case FULL_OUTER: + opClass = VectorMapJoinFullOuterMultiKeyOperator.class; + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3414,6 +3504,10 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { vectorDesc.setHashTableKind(hashTableKind); vectorDesc.setHashTableKeyType(hashTableKeyType); vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + + vectorDesc.setIsSaveNullKeyValuesForFullOuter(true); + } vectorDesc.setMinMaxEnabled(minMaxEnabled); vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); @@ -3526,6 +3620,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi /* * Similarly, we need a mapping since a value expression can be a calculation and the value * will go into a scratch column. + * + * Value expressions include keys? YES. */ int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length]; String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; @@ -3565,18 +3661,24 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions); /* - * Small table information. + * Column mapping. */ - VectorColumnOutputMapping bigTableRetainedMapping = - new VectorColumnOutputMapping("Big Table Retained Mapping"); + VectorColumnOutputMapping bigTableRetainMapping = + new VectorColumnOutputMapping("Big Table Retain Mapping"); - VectorColumnOutputMapping bigTableOuterKeyMapping = - new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + VectorColumnOutputMapping nonOuterSmallTableKeyMapping = + new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping"); + + VectorColumnOutputMapping outerSmallTableKeyMapping = + new VectorColumnOutputMapping("Outer Small Table Key Mapping"); + + VectorColumnSourceMapping fullOuterSmallTableKeyMapping = + new VectorColumnSourceMapping("Full Outer Small Table Key Mapping"); // The order of the fields in the LazyBinary small table value must be used, so // we use the source ordering flavor for the mapping. - VectorColumnSourceMapping smallTableMapping = - new VectorColumnSourceMapping("Small Table Mapping"); + VectorColumnSourceMapping smallTableValueMapping = + new VectorColumnSourceMapping("Small Table Value Mapping"); Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); @@ -3586,7 +3688,6 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi * Gather up big and small table output result information from the MapJoinDesc. */ List bigTableRetainList = desc.getRetainList().get(posBigTable); - int bigTableRetainSize = bigTableRetainList.size(); int[] smallTableIndices; int smallTableIndicesSize; @@ -3623,6 +3724,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + + final int bigTableRetainSize = bigTableRetainList.size(); for (int i = 0; i < bigTableRetainSize; i++) { // Since bigTableValueExpressions may do a calculation and produce a scratch column, we @@ -3636,9 +3739,10 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo); // Collect columns we copy from the big table batch to the overflow batch. - if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { + if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) { + // Tolerate repeated use of a big table column. - bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); } nextOutputColumn++; @@ -3655,10 +3759,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi nextOutputColumn = firstSmallTableOutputColumn; // Small table indices has more information (i.e. keys) than retain, so use it if it exists... - String[] bigTableRetainedNames; if (smallTableIndicesSize > 0) { smallTableOutputCount = smallTableIndicesSize; - bigTableRetainedNames = new String[smallTableOutputCount]; for (int i = 0; i < smallTableIndicesSize; i++) { if (smallTableIndices[i] >= 0) { @@ -3670,8 +3772,7 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we // need to map the right column. - int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; - bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex]; + int bigTableKeyColumn = bigTableKeyColumnMap[keyIndex]; TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex]; if (!isOuterJoin) { @@ -3679,25 +3780,30 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Optimize inner join keys of small table results. // Project the big table key into the small table result "area". - projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo); + projectionMapping.add(nextOutputColumn, bigTableKeyColumn, typeInfo); - if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { - // If necessary, copy the big table key into the overflow batch's small table - // result "area". - bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo); + if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumn)) { + + // When the Big Key is not retained in the output result, we do need to copy the + // Big Table key into the overflow batch so the projection of it (Big Table key) to + // the Small Table key will work properly... + // + nonOuterSmallTableKeyMapping.add(bigTableKeyColumn, bigTableKeyColumn, typeInfo); } } else { - // For outer joins, since the small table key can be null when there is no match, + // For outer joins, since the small table key can be null when there for NOMATCH, // we must have a physical (scratch) column for those keys. We cannot use the - // projection optimization used by inner joins above. + // projection optimization used by non-[FULL} OUTER joins above. int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo); + outerSmallTableKeyMapping.add(bigTableKeyColumn, scratchColumn, typeInfo); - bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo); + // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key + // into the output result. + fullOuterSmallTableKeyMapping.add(keyIndex, scratchColumn, typeInfo); } } else { @@ -3711,21 +3817,18 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi smallTableExprVectorizes = false; } - bigTableRetainedNames[i] = smallTableExprNode.toString(); - TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); // Make a new big table scratch column for the small table value. int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo); } nextOutputColumn++; } } else if (smallTableRetainSize > 0) { smallTableOutputCount = smallTableRetainSize; - bigTableRetainedNames = new String[smallTableOutputCount]; // Only small table values appear in join output result. @@ -3738,21 +3841,24 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi smallTableExprVectorizes = false; } - bigTableRetainedNames[i] = smallTableExprNode.toString(); - // Make a new big table scratch column for the small table value. TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo); nextOutputColumn++; } - } else { - bigTableRetainedNames = new String[0]; } + Map> filterExpressions = desc.getFilters(); + VectorExpression[] bigTableFilterExpressions = + vContext.getVectorExpressions( + filterExpressions.get(posBigTable), + VectorExpressionDescriptor.Mode.FILTER); + vectorMapJoinInfo.setBigTableFilterExpressions(bigTableFilterExpressions); + boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); @@ -3808,15 +3914,23 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Convert dynamic arrays and maps to simple arrays. - bigTableRetainedMapping.finalize(); + bigTableRetainMapping.finalize(); + vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns()); + vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos()); + + nonOuterSmallTableKeyMapping.finalize(); + vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns()); + vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos()); + + outerSmallTableKeyMapping.finalize(); + fullOuterSmallTableKeyMapping.finalize(); - bigTableOuterKeyMapping.finalize(); + vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping); + vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping); - smallTableMapping.finalize(); + smallTableValueMapping.finalize(); - vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); - vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); - vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping); projectionMapping.finalize(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 689c888..ad6db21 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -459,6 +459,9 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeSparkProc MapJoinProcessor.convertJoinOpMapJoinOp(context.getConf(), joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true); + if (mapJoinOp == null) { + return null; + } Operator parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 5ca6b59..72e1aef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -48,6 +48,7 @@ private boolean vectorizationOnly = false; private VectorizationDetailLevel vectorizationDetailLevel = VectorizationDetailLevel.SUMMARY; private boolean locks = false; + private boolean debug = false; private Path explainRootPath; private Map opIdToRuntimeNumRows; @@ -138,6 +139,14 @@ public void setVectorizationDetailLevel(VectorizationDetailLevel vectorizationDe this.vectorizationDetailLevel = vectorizationDetailLevel; } + public boolean isDebug() { + return debug; + } + + public void setDebug(boolean debug) { + this.debug = debug; + } + public Path getExplainRootPath() { return explainRootPath; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 3aefb61..e3466de 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -114,6 +114,8 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } } else if (explainOptions == HiveParser.KW_LOCKS) { config.setLocks(true); + } else if (explainOptions == HiveParser.KW_DEBUG) { + config.setDebug(true); } else { // UNDONE: UNKNOWN OPTION? } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 6641e0d..0eaeafd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -189,6 +189,7 @@ KW_FILE: 'FILE'; KW_JAR: 'JAR'; KW_EXPLAIN: 'EXPLAIN'; KW_EXTENDED: 'EXTENDED'; +KW_DEBUG: 'DEBUG'; KW_FORMATTED: 'FORMATTED'; KW_DEPENDENCY: 'DEPENDENCY'; KW_LOGICAL: 'LOGICAL'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 15d4edf..c87592f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -569,6 +569,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_FUNCTION", "FUNCTION"); xlateMap.put("KW_EXPLAIN", "EXPLAIN"); xlateMap.put("KW_EXTENDED", "EXTENDED"); + xlateMap.put("KW_DEBUG", "DEBUG"); xlateMap.put("KW_SERDE", "SERDE"); xlateMap.put("KW_WITH", "WITH"); xlateMap.put("KW_SERDEPROPERTIES", "SERDEPROPERTIES"); @@ -802,6 +803,7 @@ explainOption | KW_REOPTIMIZATION | KW_LOCKS | (KW_VECTORIZATION vectorizationOnly? vectorizatonDetail?) + | KW_DEBUG ; vectorizationOnly diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 2566b31..ba626f1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -831,6 +831,7 @@ nonReserved | KW_OPERATOR | KW_EXPRESSION | KW_DETAIL + | KW_DEBUG | KW_WAIT | KW_ZONE | KW_TIMESTAMPTZ diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index da30243..030bb61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -28,7 +28,7 @@ @Retention(RetentionPolicy.RUNTIME) public @interface Explain { public enum Level { - USER, DEFAULT, EXTENDED; + USER, DEFAULT, EXTENDED, DEBUG; public boolean in(Level[] levels) { for (Level level : levels) { if (level.equals(this)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 3e62142..aeeadbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -147,6 +147,10 @@ public VectorizationDetailLevel isVectorizationDetailLevel() { return config.getVectorizationDetailLevel(); } + public boolean isDebug() { + return config.isDebug(); + } + public ParseContext getParseContext() { return pCtx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java index 3820d57..0eb03e9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java @@ -126,7 +126,7 @@ public String getJoinCondString() { sb.append("Inner Join "); break; case JoinDesc.FULL_OUTER_JOIN: - sb.append("Outer Join "); + sb.append("Full Outer Join "); break; case JoinDesc.LEFT_OUTER_JOIN: sb.append("Left Outer Join "); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java index b5ffcd9..9ecbc2f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java @@ -291,7 +291,7 @@ public void setExprs(final Map> exprs) { first = false; sb.append("{"); - sb.append(expr.getExprString()); + sb.append(expr == null ? "NULL" : expr.getExprString()); sb.append("}"); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index dc4f085..8ba5101 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -33,10 +33,16 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.optimizer.signature.Signature; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hive.common.util.ReflectionUtil; /** * Map Join operator Descriptor implementation. @@ -91,6 +97,7 @@ public MapJoinDesc(MapJoinDesc clone) { this.keys = clone.keys; this.keyTblDesc = clone.keyTblDesc; this.valueTblDescs = clone.valueTblDescs; + this.valueFilteredTblDescs = clone.valueFilteredTblDescs; this.posBigTable = clone.posBigTable; this.valueIndices = clone.valueIndices; this.retainList = clone.retainList; @@ -208,6 +215,16 @@ public void setDumpFilePrefix(String dumpFilePrefix) { this.dumpFilePrefix = dumpFilePrefix; } + // NOTE: Debugging only. + @Explain(displayName = "keyExpressions", explainLevels = { Level.DEBUG }) + public Map getKeyExpressionString() { + Map keyMap = new LinkedHashMap(); + for (Map.Entry> k: getKeys().entrySet()) { + keyMap.put(k.getKey(), k.getValue().toString()); + } + return keyMap; + } + /** * @return the keys in string form */ @@ -292,6 +309,60 @@ public void setValueFilteredTblDescs(List valueFilteredTblDescs) { return valueTblDescs; } + // NOTE: Debugging only. + @Explain(displayName = "keyContext", explainLevels = { Level.DEBUG }) + public String getDebugKeyContext() { + MapJoinObjectSerDeContext keyContext; + try { + AbstractSerDe keySerde = + (AbstractSerDe) ReflectionUtil.newInstance( + keyTblDesc.getDeserializerClass(), null); + SerDeUtils.initializeSerDe(keySerde, null, keyTblDesc.getProperties(), null); + keyContext = new MapJoinObjectSerDeContext(keySerde, false); + } catch (SerDeException e) { + return null; + } + return keyContext.stringify(); + } + + private boolean hasFilter(int alias, int[][] filterMaps) { + return filterMaps != null && filterMaps[alias] != null; + } + + // NOTE: Debugging only. + @Explain(displayName = "valueContexts", explainLevels = { Level.DEBUG }) + public String getDebugValueContext() { + List valueContextStringList = new ArrayList(); + try { + boolean noOuterJoin = getNoOuterJoin(); + // Order in which the results should be output. + Byte[] order = getTagOrder(); + int[][] filterMaps = getFilterMap(); + + for (int pos = 0; pos < order.length; pos++) { + if (pos == posBigTable) { + continue; + } + TableDesc valueTableDesc; + if (noOuterJoin) { + valueTableDesc = getValueTblDescs().get(pos); + } else { + valueTableDesc = getValueFilteredTblDescs().get(pos); + } + AbstractSerDe valueSerDe = + (AbstractSerDe) ReflectionUtil.newInstance( + valueTableDesc.getDeserializerClass(), null); + SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); + MapJoinObjectSerDeContext valueContext = + new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos, filterMaps)); + valueContextStringList.add(pos + ":" + valueContext.stringify()); + } + } catch (SerDeException e) { + return null; + } + return valueContextStringList.toString(); + } + /** * @param valueTblDescs * the valueTblDescs to set @@ -378,6 +449,8 @@ public boolean getGenJoinKeys() { return genJoinKeys; } + @Explain(displayName = "DynamicPartitionHashJoin", explainLevels = { Level.USER, Level.DEFAULT, + Level.EXTENDED }, displayOnlyOnTrue = true) public boolean isDynamicPartitionHashJoin() { return isDynamicPartitionHashJoin; } @@ -386,6 +459,25 @@ public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { this.isDynamicPartitionHashJoin = isDistributedHashJoin; } + // NOTE: Debugging only. + @Explain(displayName = "outer filter mappings", explainLevels = { Level.DEBUG }) + public String getDebugOuterFilterMapString() { + if (conds.length != 1) { + return null; + } + JoinCondDesc cond = conds[0]; + if (cond.getType() != JoinDesc.FULL_OUTER_JOIN && + cond.getType() != JoinDesc.LEFT_OUTER_JOIN && + cond.getType() != JoinDesc.RIGHT_OUTER_JOIN) { + return null; + } + int[][] fm = getFilterMap(); + if (fm == null) { + return null; + } + return Arrays.deepToString(fm); + } + // Use LinkedHashSet to give predictable display order. private static final Set vectorizableMapJoinNativeEngines = new LinkedHashSet(Arrays.asList("tez", "spark")); @@ -401,7 +493,9 @@ public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, VectorMapJoinDesc vectorMapJoinDesc) { // VectorMapJoinOperator is not native vectorized. - super(vectorMapJoinDesc, vectorMapJoinDesc.getHashTableImplementationType() != HashTableImplementationType.NONE); + super( + vectorMapJoinDesc, + vectorMapJoinDesc.getHashTableImplementationType() != HashTableImplementationType.NONE); this.mapJoinDesc = mapJoinDesc; this.vectorMapJoinDesc = vectorMapJoinDesc; vectorMapJoinInfo = @@ -414,7 +508,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, String engine = vectorMapJoinDesc.getEngine(); String engineInSupportedCondName = - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableMapJoinNativeEngines; + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + + engine + " IN " + vectorizableMapJoinNativeEngines; boolean engineInSupported = vectorizableMapJoinNativeEngines.contains(engine); boolean isFastHashTableEnabled = vectorMapJoinDesc.getIsFastHashTableEnabled(); @@ -469,7 +564,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return conditions; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -477,7 +573,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return VectorizationCondition.getConditionsMet(nativeConditions); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsNotMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -485,7 +582,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return VectorizationCondition.getConditionsNotMet(nativeConditions); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "bigTableKeyExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getBigTableKeyExpressions() { return vectorExpressionsToStringList( isNative ? @@ -493,8 +591,18 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, vectorMapJoinDesc.getAllBigTableKeyExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableKeyColumnNums() { + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "hashTableImplementationType", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String hashTableImplementationType() { + if (!isNative) { + return null; + } + return vectorMapJoinDesc.getHashTableImplementationType().name(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableKeyColumns() { if (!isNative) { return null; } @@ -502,10 +610,13 @@ public String getBigTableKeyColumnNums() { if (bigTableKeyColumnMap.length == 0) { return null; } - return Arrays.toString(bigTableKeyColumnMap); + return outputColumnsAndTypesToStringList( + vectorMapJoinInfo.getBigTableKeyColumnMap(), + vectorMapJoinInfo.getBigTableKeyTypeInfos()); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getBigTableValueExpressions() { return vectorExpressionsToStringList( isNative ? @@ -513,8 +624,18 @@ public String getBigTableKeyColumnNums() { vectorMapJoinDesc.getAllBigTableValueExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableValueColumnNums() { + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableFilterExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableFilterExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableFilterExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableValueColumns() { if (!isNative) { return null; } @@ -522,48 +643,78 @@ public String getBigTableValueColumnNums() { if (bigTableValueColumnMap.length == 0) { return null; } - return Arrays.toString(bigTableValueColumnMap); + return outputColumnsAndTypesToStringList( + vectorMapJoinInfo.getBigTableValueColumnMap(), + vectorMapJoinInfo.getBigTableValueTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getSmallTableColumns() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableValueMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getSmallTableColumns() { if (!isNative) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getSmallTableMapping()); + return outputColumnsAndTypesToStringList(vectorMapJoinInfo.getSmallTableValueMapping()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutputColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getProjectedOutputColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutput", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getProjectedOutputColumnNums() { if (!isNative) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getProjectionMapping()); + return outputColumnsAndTypesToStringList(vectorMapJoinInfo.getProjectionMapping()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableOuterKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List getBigTableOuterKey() { - if (!isNative || vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.OUTER) { + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainColumnNums", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableRetainedColumnNums() { + if (!isNative) { return null; } - return columnMappingToStringList(vectorMapJoinInfo.getBigTableOuterKeyMapping()); + return Arrays.toString(vectorMapJoinInfo.getBigTableRetainColumnMap()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainedColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableRetainedColumnNums() { - if (!isNative) { + @Explain(vectorization = Vectorization.DETAIL, displayName = "nonOuterSmallTableKeyMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getNonOuterSmallTableKeyMapping() { + if (!isNative || + (vectorMapJoinDesc.getVectorMapJoinVariation() == VectorMapJoinVariation.OUTER || + vectorMapJoinDesc.getVectorMapJoinVariation() == VectorMapJoinVariation.FULL_OUTER)) { + return null; + } + return Arrays.toString(vectorMapJoinInfo.getNonOuterSmallTableKeyColumnMap()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "outerSmallTableKeyMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getOuterSmallTableKeyMapping() { + if (!isNative || + vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.OUTER) { + return null; + } + return columnMappingToStringList(vectorMapJoinInfo.getOuterSmallTableKeyMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "fullOuterSmallTableKeyMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getFullOuterSmallTableKeyMapping() { + if (!isNative || + vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.FULL_OUTER) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getBigTableRetainedMapping()); + return columnMappingToStringList(vectorMapJoinInfo.getFullOuterSmallTableKeyMapping()); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeNotSupportedKeyTypes() { return vectorMapJoinDesc.getNotSupportedKeyTypes(); } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public MapJoinOperatorExplainVectorization getMapJoinVectorization() { VectorMapJoinDesc vectorMapJoinDesc = (VectorMapJoinDesc) getVectorDesc(); if (vectorMapJoinDesc == null || this instanceof SMBJoinDesc) { @@ -587,7 +738,8 @@ public SMBJoinOperatorExplainVectorization(SMBJoinDesc smbJoinDesc, } // Handle dual nature. - @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public SMBJoinOperatorExplainVectorization getSMBJoinVectorization() { VectorSMBJoinDesc vectorSMBJoinDesc = (VectorSMBJoinDesc) getVectorDesc(); if (vectorSMBJoinDesc == null || !(this instanceof SMBJoinDesc)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java index 45f3347..dd907ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java @@ -29,7 +29,9 @@ import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.mapred.JobConf; public class MergeJoinWork extends BaseWork { @@ -180,4 +182,59 @@ public boolean getLlapMode() { public void addDummyOp(HashTableDummyOperator dummyOp) { getMainWork().addDummyOp(dummyOp); } + + /** + * For now, this class just says in EXPLAIN VECTORIZATION we don't support vectorization of the + * Merge Join vertex instead of being silent about it. + */ + public class MergeJoinExplainVectorization extends BaseExplainVectorization { + + private final MergeJoinWork mergeJoinWork; + + private VectorizationCondition[] mergeWorkVectorizationConditions; + + public MergeJoinExplainVectorization(MergeJoinWork mergeJoinWork) { + super(mergeJoinWork); + this.mergeJoinWork = mergeJoinWork; + } + + private VectorizationCondition[] createMergeWorkExplainVectorizationConditions() { + + boolean enabled = false; + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + "Vectorizing MergeJoin Supported") + }; + return conditions; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsMet() { + if (mergeWorkVectorizationConditions == null) { + mergeWorkVectorizationConditions = createMergeWorkExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsMet(mergeWorkVectorizationConditions); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsNotMet() { + if (mergeWorkVectorizationConditions == null) { + mergeWorkVectorizationConditions = createMergeWorkExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsNotMet(mergeWorkVectorizationConditions); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "MergeJoin Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MergeJoinExplainVectorization getReduceExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new MergeJoinExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java index 446b810..5439e14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java @@ -59,6 +59,25 @@ public String outputColumnsToStringList(VectorColumnMapping vectorColumnMapping) return Arrays.toString(outputColumns); } + public List outputColumnsAndTypesToStringList(int[] outputColumns, TypeInfo[] typeInfos) { + final int size = outputColumns.length; + ArrayList result = new ArrayList(size); + for (int i = 0; i < size; i++) { + result.add(outputColumns[i] + ":" + typeInfos[i].toString()); + } + return result; + } + + public List outputColumnsAndTypesToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + TypeInfo[] typeInfos = vectorColumnMapping.getTypeInfos(); + return outputColumnsAndTypesToStringList(outputColumns, typeInfos); + } + public List columnMappingToStringList(VectorColumnMapping vectorColumnMapping) { final int size = vectorColumnMapping.getCount(); if (size == 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index 61216bc..b1d8e1f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.optimizer.signature.Signature; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc.ReduceSinkKeyType; @@ -193,6 +194,17 @@ public Object clone() { return outputKeyColumnNames; } + // NOTE: Debugging only. + @Explain(displayName = "output key column names", explainLevels = { Level.DEBUG }) + public List getOutputKeyColumnNamesDisplay() { + List result = new ArrayList(); + for (String name : outputKeyColumnNames) { + result.add(Utilities.ReduceField.KEY.name() + "." + name); + } + return result; + } + + public void setOutputKeyColumnNames( java.util.ArrayList outputKeyColumnNames) { this.outputKeyColumnNames = outputKeyColumnNames; @@ -202,6 +214,16 @@ public void setOutputKeyColumnNames( return outputValueColumnNames; } + // NOTE: Debugging only. + @Explain(displayName = "output value column names", explainLevels = { Level.DEBUG }) + public List getOutputValueColumnNamesDisplay() { + List result = new ArrayList(); + for (String name : outputValueColumnNames) { + result.add(Utilities.ReduceField.VALUE.name() + "." + name); + } + return result; + } + public void setOutputValueColumnNames( java.util.ArrayList outputValueColumnNames) { this.outputValueColumnNames = outputValueColumnNames; @@ -540,34 +562,41 @@ public ReduceSinkOperatorExplainVectorization(ReduceSinkDesc reduceSinkDesc, return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "keyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getKeyColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "keyColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeyColumns() { if (!isNative) { return null; } int[] keyColumnMap = vectorReduceSinkInfo.getReduceSinkKeyColumnMap(); if (keyColumnMap == null) { // Always show an array. - keyColumnMap = new int[0]; + return new ArrayList(); } - return Arrays.toString(keyColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkKeyColumnMap(), + vectorReduceSinkInfo.getReduceSinkKeyTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "valueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getValueColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "valueColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getValueColumns() { if (!isNative) { return null; } int[] valueColumnMap = vectorReduceSinkInfo.getReduceSinkValueColumnMap(); if (valueColumnMap == null) { // Always show an array. - valueColumnMap = new int[0]; + return new ArrayList(); } - return Arrays.toString(valueColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkValueColumnMap(), + vectorReduceSinkInfo.getReduceSinkValueTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bucketColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBucketColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "bucketColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBucketColumns() { if (!isNative) { return null; } @@ -576,11 +605,14 @@ public String getBucketColumnNums() { // Suppress empty column map. return null; } - return Arrays.toString(bucketColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkBucketColumnMap(), + vectorReduceSinkInfo.getReduceSinkBucketTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getPartitionColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getPartitionColumns() { if (!isNative) { return null; } @@ -589,7 +621,9 @@ public String getPartitionColumnNums() { // Suppress empty column map. return null; } - return Arrays.toString(partitionColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkPartitionColumnMap(), + vectorReduceSinkInfo.getReduceSinkPartitionTypeInfos()); } private VectorizationCondition[] createNativeConditions() { @@ -598,7 +632,8 @@ public String getPartitionColumnNums() { String engine = vectorReduceSinkDesc.getEngine(); String engineInSupportedCondName = - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableReduceSinkNativeEngines; + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + + vectorizableReduceSinkNativeEngines; boolean engineInSupported = vectorizableReduceSinkNativeEngines.contains(engine); VectorizationCondition[] conditions = new VectorizationCondition[] { @@ -633,7 +668,8 @@ public String getPartitionColumnNums() { return conditions; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -641,7 +677,8 @@ public String getPartitionColumnNums() { return VectorizationCondition.getConditionsMet(nativeConditions); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsNotMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -650,7 +687,8 @@ public String getPartitionColumnNums() { } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Reduce Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Reduce Sink Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public ReduceSinkOperatorExplainVectorization getReduceSinkVectorization() { VectorReduceSinkDesc vectorReduceSinkDesc = (VectorReduceSinkDesc) getVectorDesc(); if (vectorReduceSinkDesc == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 58032ca..a8f045c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -85,11 +85,11 @@ public PrimitiveTypeInfo getPrimitiveTypeInfo() { } public static enum VectorMapJoinVariation { - NONE, - INNER_BIG_ONLY, INNER, + INNER_BIG_ONLY, LEFT_SEMI, - OUTER + OUTER, + FULL_OUTER } private HashTableImplementationType hashTableImplementationType; @@ -107,7 +107,7 @@ public VectorMapJoinDesc() { hashTableImplementationType = HashTableImplementationType.NONE; hashTableKind = HashTableKind.NONE; hashTableKeyType = HashTableKeyType.NONE; - vectorMapJoinVariation = VectorMapJoinVariation.NONE; + vectorMapJoinVariation = null; minMaxEnabled = false; allBigTableKeyExpressions = null; @@ -206,6 +206,7 @@ public VectorMapJoinInfo getVectorMapJoinInfo() { private List notSupportedKeyTypes; private boolean smallTableExprVectorizes; private boolean outerJoinHasNoKeys; + boolean isSaveNullKeyValuesForFullOuter; public void setUseOptimizedTable(boolean useOptimizedTable) { this.useOptimizedTable = useOptimizedTable; @@ -274,5 +275,10 @@ public void setIsHybridHashJoin(boolean isHybridHashJoin) { public boolean getIsHybridHashJoin() { return isHybridHashJoin; } - + public void setIsSaveNullKeyValuesForFullOuter(boolean isSaveNullKeyValuesForFullOuter) { + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + } + public boolean getIsSaveNullKeyValuesForFullOuter() { + return isSaveNullKeyValuesForFullOuter; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java index 6db0540..ad82e5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java @@ -48,9 +48,19 @@ private TypeInfo[] bigTableValueTypeInfos; private VectorExpression[] slimmedBigTableValueExpressions; - private VectorColumnOutputMapping bigTableRetainedMapping; - private VectorColumnOutputMapping bigTableOuterKeyMapping; - private VectorColumnSourceMapping smallTableMapping; + private VectorExpression[] bigTableFilterExpressions; + + private int[] bigTableRetainColumnMap; + private TypeInfo[] bigTableRetainTypeInfos; + + private int[] nonOuterSmallTableKeyColumnMap; + private TypeInfo[] nonOuterSmallTableKeyTypeInfos; + + private VectorColumnOutputMapping outerSmallTableKeyMapping; + + private VectorColumnSourceMapping fullOuterSmallTableKeyMapping; + + private VectorColumnSourceMapping smallTableValueMapping; private VectorColumnSourceMapping projectionMapping; @@ -65,9 +75,19 @@ public VectorMapJoinInfo() { bigTableValueTypeInfos = null; slimmedBigTableValueExpressions = null; - bigTableRetainedMapping = null; - bigTableOuterKeyMapping = null; - smallTableMapping = null; + bigTableFilterExpressions = null; + + bigTableRetainColumnMap = null; + bigTableRetainTypeInfos = null; + + nonOuterSmallTableKeyColumnMap = null; + nonOuterSmallTableKeyTypeInfos = null; + + outerSmallTableKeyMapping = null; + + fullOuterSmallTableKeyMapping = null; + + smallTableValueMapping = null; projectionMapping = null; } @@ -138,28 +158,69 @@ public void setSlimmedBigTableValueExpressions( this.slimmedBigTableValueExpressions = slimmedBigTableValueExpressions; } - public void setBigTableRetainedMapping(VectorColumnOutputMapping bigTableRetainedMapping) { - this.bigTableRetainedMapping = bigTableRetainedMapping; + public VectorExpression[] getBigTableFilterExpressions() { + return bigTableFilterExpressions; + } + + public void setBigTableFilterExpressions(VectorExpression[] bigTableFilterExpressions) { + this.bigTableFilterExpressions = bigTableFilterExpressions; + } + + public void setBigTableRetainColumnMap(int[] bigTableRetainColumnMap) { + this.bigTableRetainColumnMap = bigTableRetainColumnMap; + } + + public int[] getBigTableRetainColumnMap() { + return bigTableRetainColumnMap; + } + + public void setBigTableRetainTypeInfos(TypeInfo[] bigTableRetainTypeInfos) { + this.bigTableRetainTypeInfos = bigTableRetainTypeInfos; + } + + public TypeInfo[] getBigTableRetainTypeInfos() { + return bigTableRetainTypeInfos; + } + + public void setNonOuterSmallTableKeyColumnMap(int[] nonOuterSmallTableKeyColumnMap) { + this.nonOuterSmallTableKeyColumnMap = nonOuterSmallTableKeyColumnMap; + } + + public int[] getNonOuterSmallTableKeyColumnMap() { + return nonOuterSmallTableKeyColumnMap; + } + + public void setNonOuterSmallTableKeyTypeInfos(TypeInfo[] nonOuterSmallTableKeyTypeInfos) { + this.nonOuterSmallTableKeyTypeInfos = nonOuterSmallTableKeyTypeInfos; + } + + public TypeInfo[] getNonOuterSmallTableKeyTypeInfos() { + return nonOuterSmallTableKeyTypeInfos; + } + + public void setOuterSmallTableKeyMapping(VectorColumnOutputMapping outerSmallTableKeyMapping) { + this.outerSmallTableKeyMapping = outerSmallTableKeyMapping; } - public VectorColumnOutputMapping getBigTableRetainedMapping() { - return bigTableRetainedMapping; + public VectorColumnOutputMapping getOuterSmallTableKeyMapping() { + return outerSmallTableKeyMapping; } - public void setBigTableOuterKeyMapping(VectorColumnOutputMapping bigTableOuterKeyMapping) { - this.bigTableOuterKeyMapping = bigTableOuterKeyMapping; + public void setFullOuterSmallTableKeyMapping( + VectorColumnSourceMapping fullOuterSmallTableKeyMapping) { + this.fullOuterSmallTableKeyMapping = fullOuterSmallTableKeyMapping; } - public VectorColumnOutputMapping getBigTableOuterKeyMapping() { - return bigTableOuterKeyMapping; + public VectorColumnSourceMapping getFullOuterSmallTableKeyMapping() { + return fullOuterSmallTableKeyMapping; } - public void setSmallTableMapping(VectorColumnSourceMapping smallTableMapping) { - this.smallTableMapping = smallTableMapping; + public void setSmallTableValueMapping(VectorColumnSourceMapping smallTableValueMapping) { + this.smallTableValueMapping = smallTableValueMapping; } - public VectorColumnSourceMapping getSmallTableMapping() { - return smallTableMapping; + public VectorColumnSourceMapping getSmallTableValueMapping() { + return smallTableValueMapping; } public void setProjectionMapping(VectorColumnSourceMapping projectionMapping) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java index 9f785e6..e5c749f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java @@ -84,9 +84,9 @@ public void testGetNonExistent() throws Exception { map.put(kv2, -1); key[0] = (byte)(key[0] + 1); BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - map.getValueResult(key, 0, key.length, hashMapResult); + map.getValueResult(key, 0, key.length, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); - map.getValueResult(key, 0, 0, hashMapResult); + map.getValueResult(key, 0, 0, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); } @@ -104,7 +104,7 @@ public void testPutWithFullMap() throws Exception { assertEquals(CAPACITY, map.getCapacity()); // Get of non-existent key should terminate.. BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - map.getValueResult(new byte[0], 0, 0, hashMapResult); + map.getValueResult(new byte[0], 0, 0, hashMapResult, null); } @Test @@ -123,7 +123,7 @@ public void testExpand() throws Exception { private void verifyHashMapResult(BytesBytesMultiHashMap map, byte[] key, byte[]... values) { BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - byte state = map.getValueResult(key, 0, key.length, hashMapResult); + byte state = map.getValueResult(key, 0, key.length, hashMapResult, null); HashSet hs = new HashSet(); int count = 0; if (hashMapResult.hasRows()) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java index 6491d79..244208b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java @@ -26,8 +26,22 @@ private static final long serialVersionUID = 1L; + private boolean isClosed; + private boolean isAborted; + public CollectorTestOperator() { super(); + + isClosed = false; + isAborted = false; + } + + public boolean getIsClosed() { + return isClosed; + } + + public boolean getIsAborted() { + return isAborted; } @Override @@ -36,6 +50,14 @@ public void process(Object row, int tag) throws HiveException { } @Override + public void closeOp(boolean abort) { + isClosed = true; + if (abort) { + isAborted = true; + } + } + + @Override public String getName() { return CollectorTestOperator.class.getSimpleName(); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java index 18933d4..ce90a6d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.util.collectoroperator; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -28,19 +31,30 @@ private static final long serialVersionUID = 1L; private final ObjectInspector[] outputObjectInspectors; + private final int columnSize; public RowCollectorTestOperator(ObjectInspector[] outputObjectInspectors) { super(); this.outputObjectInspectors = outputObjectInspectors; + columnSize = outputObjectInspectors.length; } @Override public void process(Object row, int tag) throws HiveException { rowCount++; - Object[] rowObjectArray = (Object[]) row; - Object[] resultObjectArray = new Object[rowObjectArray.length]; - for (int c = 0; c < rowObjectArray.length; c++) { - resultObjectArray[c] = ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectArray[c]); + Object[] resultObjectArray = new Object[columnSize]; + if (row instanceof ArrayList) { + List rowObjectList = (ArrayList) row; + for (int c = 0; c < columnSize; c++) { + resultObjectArray[c] = + ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectList.get(c)); + } + } else { + Object[] rowObjectArray = (Object[]) row; + for (int c = 0; c < columnSize; c++) { + resultObjectArray[c] = + ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectArray[c]); + } } nextTestRow(new RowTestObjects(resultObjectArray)); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java index 06cd1e9..a2f9f04 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java @@ -39,6 +39,16 @@ public RowVectorCollectorTestOperator(TypeInfo[] outputTypeInfos, vectorExtractRow.init(outputTypeInfos); } + public RowVectorCollectorTestOperator( + int[] outputProjectionColumnNums, + TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors) throws HiveException { + super(); + this.outputObjectInspectors = outputObjectInspectors; + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(outputTypeInfos, outputProjectionColumnNums); + } + @Override public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java index 51a5f8e..ec53a3d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java @@ -26,55 +26,129 @@ import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; public class RowTestObjectsMultiSet { - private SortedMap sortedMap; - private int rowCount; - private int totalCount; + + public static enum RowFlag { + NONE (0), + REGULAR (0x01), + LEFT_OUTER (0x02), + FULL_OUTER (0x04); + + public final long value; + RowFlag(long value) { + this.value = value; + } + } + + private static class Value { + + // Mutable. + public int count; + public long rowFlags; + + public final int initialKeyCount; + public final int initialValueCount; + public final RowFlag initialRowFlag; + + public Value(int count, RowFlag rowFlag, int totalKeyCount, int totalValueCount) { + this.count = count; + this.rowFlags = rowFlag.value; + + initialKeyCount = totalKeyCount; + initialValueCount = totalValueCount; + initialRowFlag = rowFlag; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("count "); + sb.append(count); + return sb.toString(); + } + } + + private SortedMap sortedMap; + private int totalKeyCount; + private int totalValueCount; public RowTestObjectsMultiSet() { - sortedMap = new TreeMap(); - rowCount = 0; - totalCount = 0; + sortedMap = new TreeMap(); + totalKeyCount = 0; + totalValueCount = 0; } - public int getRowCount() { - return rowCount; + public int getTotalKeyCount() { + return totalKeyCount; } - public int getTotalCount() { - return totalCount; + public int getTotalValueCount() { + return totalValueCount; } - public void add(RowTestObjects testRow) { + public void add(RowTestObjects testRow, RowFlag rowFlag) { if (sortedMap.containsKey(testRow)) { - Integer count = sortedMap.get(testRow); - count++; + Value value = sortedMap.get(testRow); + value.count++; + value.rowFlags |= rowFlag.value; + totalValueCount++; } else { - sortedMap.put(testRow, 1); - rowCount++; + sortedMap.put(testRow, new Value(1, rowFlag, ++totalKeyCount, ++totalValueCount)); + } + + } + + public void add(RowTestObjects testRow, int count) { + if (sortedMap.containsKey(testRow)) { + throw new RuntimeException(); + } + sortedMap.put(testRow, new Value(count, RowFlag.NONE, ++totalKeyCount, ++totalValueCount)); + } + + public String displayRowFlags(long rowFlags) { + StringBuilder sb = new StringBuilder(); + sb.append("{"); + for (RowFlag rowFlag : RowFlag.values()) { + if ((rowFlags & rowFlag.value) != 0) { + if (sb.length() > 1) { + sb.append(", "); + } + sb.append(rowFlag.name()); + } } - totalCount++; + sb.append("}"); + return sb.toString(); } - public boolean verify(RowTestObjectsMultiSet other) { + public boolean verify(RowTestObjectsMultiSet other, String left, String right) { final int thisSize = this.sortedMap.size(); final int otherSize = other.sortedMap.size(); if (thisSize != otherSize) { - System.out.println("*VERIFY* count " + thisSize + " doesn't match otherSize " + otherSize); + System.out.println("*BENCHMARK* " + left + " count " + thisSize + " doesn't match " + right + " " + otherSize); return false; } - Iterator> thisIterator = this.sortedMap.entrySet().iterator(); - Iterator> otherIterator = other.sortedMap.entrySet().iterator(); + Iterator> thisIterator = this.sortedMap.entrySet().iterator(); + Iterator> otherIterator = other.sortedMap.entrySet().iterator(); for (int i = 0; i < thisSize; i++) { - Entry thisEntry = thisIterator.next(); - Entry otherEntry = otherIterator.next(); + Entry thisEntry = thisIterator.next(); + Entry otherEntry = otherIterator.next(); if (!thisEntry.getKey().equals(otherEntry.getKey())) { - System.out.println("*VERIFY* thisEntry.getKey() " + thisEntry.getKey() + " doesn't match otherEntry.getKey() " + otherEntry.getKey()); + System.out.println("*BENCHMARK* " + left + " row " + thisEntry.getKey().toString() + + " (rowFlags " + displayRowFlags(thisEntry.getValue().rowFlags) + + " count " + thisEntry.getValue().count + ")" + + " but found " + right + " row " + otherEntry.getKey().toString() + + " (initialKeyCount " + + otherEntry.getValue().initialKeyCount + + " initialValueCount " + otherEntry.getValue().initialValueCount + ")"); return false; } // Check multi-set count. - if (!thisEntry.getValue().equals(otherEntry.getValue())) { - System.out.println("*VERIFY* key " + thisEntry.getKey() + " count " + thisEntry.getValue() + " doesn't match " + otherEntry.getValue()); + if (thisEntry.getValue().count != otherEntry.getValue().count) { + System.out.println("*BENCHMARK* " + left + " row " + thisEntry.getKey().toString() + + " count " + thisEntry.getValue().count + + " (rowFlags " + displayRowFlags(thisEntry.getValue().rowFlags) + ")" + + " doesn't match " + right + " row count " + otherEntry.getValue().count + + " (initialKeyCount " + + otherEntry.getValue().initialKeyCount + + " initialValueCount " + otherEntry.getValue().initialValueCount + ")"); return false; } } @@ -84,6 +158,51 @@ public boolean verify(RowTestObjectsMultiSet other) { return true; } + public RowTestObjectsMultiSet subtract(RowTestObjectsMultiSet other) { + RowTestObjectsMultiSet result = new RowTestObjectsMultiSet(); + + Iterator> thisIterator = this.sortedMap.entrySet().iterator(); + while (thisIterator.hasNext()) { + Entry thisEntry = thisIterator.next(); + + if (other.sortedMap.containsKey(thisEntry.getKey())) { + Value thisValue = thisEntry.getValue(); + Value otherValue = other.sortedMap.get(thisEntry.getKey()); + if (thisValue.count == otherValue.count) { + continue; + } + } + result.add(thisEntry.getKey(), thisEntry.getValue().count); + } + + return result; + } + + public void displayDifferences(RowTestObjectsMultiSet other, String left, String right) { + + RowTestObjectsMultiSet leftOnly = this.subtract(other); + Iterator> leftOnlyIterator = + leftOnly.sortedMap.entrySet().iterator(); + while (leftOnlyIterator.hasNext()) { + Entry leftOnlyEntry = leftOnlyIterator.next(); + System.out.println( + "*BENCHMARK* " + left + " only row " + leftOnlyEntry.getKey().toString() + + " count " + leftOnlyEntry.getValue().count + + " (initialRowFlag " + leftOnlyEntry.getValue().initialRowFlag.name() + ")"); + } + + RowTestObjectsMultiSet rightOnly = other.subtract(this); + Iterator> rightOnlyIterator = + rightOnly.sortedMap.entrySet().iterator(); + while (rightOnlyIterator.hasNext()) { + Entry rightOnlyEntry = rightOnlyIterator.next(); + System.out.println( + "*BENCHMARK* " + right + " only row " + rightOnlyEntry.getKey().toString() + + " count " + rightOnlyEntry.getValue().count + + " (initialRowFlag " + rightOnlyEntry.getValue().initialRowFlag.name() + ")"); + } + } + @Override public String toString() { return sortedMap.toString(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java index 6fd8e09..4ebf3ee 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java @@ -35,8 +35,6 @@ private static int TEST_COUNT = 5000; - private static int fake = 0; - @Test public void testDouble() throws Exception { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 2d0c783..6ce63a4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -62,9 +62,8 @@ public ValidatorVectorSelectOperator(CompilationOpContext ctx, OperatorDesc conf * Override forward to do validation */ @Override - public void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) + public void vectorForward(VectorizedRowBatch vrg) throws HiveException { - VectorizedRowBatch vrg = (VectorizedRowBatch) row; int[] projections = vrg.projectedColumns; assertEquals(2, vrg.projectionSize); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index b84273a..a2febe4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -170,15 +170,18 @@ public boolean getAddPadding() { private final GenerationKind generationKind; private final TypeInfo typeInfo; + private final boolean columnAllowNulls; private final TypeInfo sourceTypeInfo; private final StringGenerationOption stringGenerationOption; private final List valueList; - private GenerationSpec(GenerationKind generationKind, TypeInfo typeInfo, + private GenerationSpec( + GenerationKind generationKind, TypeInfo typeInfo, boolean columnAllowNulls, TypeInfo sourceTypeInfo, StringGenerationOption stringGenerationOption, List valueList) { this.generationKind = generationKind; this.typeInfo = typeInfo; + this.columnAllowNulls = columnAllowNulls; this.sourceTypeInfo = sourceTypeInfo; this.stringGenerationOption = stringGenerationOption; this.valueList = valueList; @@ -192,6 +195,10 @@ public TypeInfo getTypeInfo() { return typeInfo; } + public boolean getColumnAllowNulls() { + return columnAllowNulls; + } + public TypeInfo getSourceTypeInfo() { return sourceTypeInfo; } @@ -206,34 +213,46 @@ public StringGenerationOption getStringGenerationOption() { public static GenerationSpec createSameType(TypeInfo typeInfo) { return new GenerationSpec( - GenerationKind.SAME_TYPE, typeInfo, null, null, null); + GenerationKind.SAME_TYPE, typeInfo, true, + null, null, null); + } + + public static GenerationSpec createSameType(TypeInfo typeInfo, boolean allowNulls) { + return new GenerationSpec( + GenerationKind.SAME_TYPE, typeInfo, allowNulls, + null, null, null); } public static GenerationSpec createOmitGeneration(TypeInfo typeInfo) { return new GenerationSpec( - GenerationKind.OMIT_GENERATION, typeInfo, null, null, null); + GenerationKind.OMIT_GENERATION, typeInfo, true, + null, null, null); } public static GenerationSpec createStringFamily(TypeInfo typeInfo, StringGenerationOption stringGenerationOption) { return new GenerationSpec( - GenerationKind.STRING_FAMILY, typeInfo, null, stringGenerationOption, null); + GenerationKind.STRING_FAMILY, typeInfo, true, + null, stringGenerationOption, null); } public static GenerationSpec createStringFamilyOtherTypeValue(TypeInfo typeInfo, TypeInfo otherTypeTypeInfo) { return new GenerationSpec( - GenerationKind.STRING_FAMILY_OTHER_TYPE_VALUE, typeInfo, otherTypeTypeInfo, null, null); + GenerationKind.STRING_FAMILY_OTHER_TYPE_VALUE, typeInfo, true, + otherTypeTypeInfo, null, null); } public static GenerationSpec createTimestampMilliseconds(TypeInfo typeInfo) { return new GenerationSpec( - GenerationKind.TIMESTAMP_MILLISECONDS, typeInfo, null, null, null); + GenerationKind.TIMESTAMP_MILLISECONDS, typeInfo, true, + null, null, null); } public static GenerationSpec createValueList(TypeInfo typeInfo, List valueList) { return new GenerationSpec( - GenerationKind.VALUE_LIST, typeInfo, null, null, valueList); + GenerationKind.VALUE_LIST, typeInfo, true, + null, null, valueList); } } @@ -620,7 +639,9 @@ private void chooseSchema(SupportedTypes supportedTypes, Set allowedType if (generationSpecList != null) { typeName = generationSpecList.get(c).getTypeInfo().getTypeName(); - dataTypePhysicalVariation = explicitDataTypePhysicalVariationList.get(c); + dataTypePhysicalVariation = + explicitDataTypePhysicalVariationList != null ? + explicitDataTypePhysicalVariationList.get(c) : DataTypePhysicalVariation.NONE; } else if (onlyOne || allowedTypeNameSet != null) { typeName = getRandomTypeName(r, supportedTypes, allowedTypeNameSet); } else { @@ -887,7 +908,7 @@ public static Object randomStringFamily(Random random, TypeInfo typeInfo, Object object; switch (generationKind) { case SAME_TYPE: - object = randomWritable(c); + object = randomWritable(c, generationSpec.getColumnAllowNulls()); break; case OMIT_GENERATION: object = null; @@ -1454,6 +1475,12 @@ public static Object getNonWritableObject(Object object, TypeInfo typeInfo, } } + public Object randomWritable(int column, boolean columnAllowNull) { + return randomWritable( + r, typeInfos[column], objectInspectorList.get(column), dataTypePhysicalVariations[column], + columnAllowNull && allowNull); + } + public Object randomWritable(int column) { return randomWritable( r, typeInfos[column], objectInspectorList.get(column), dataTypePhysicalVariations[column], diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java index 0514e3f..63f673f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java @@ -26,10 +26,14 @@ import java.util.Map; import java.util.Map.Entry; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; @@ -37,31 +41,42 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; +import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -90,45 +105,204 @@ NATIVE_VECTOR_FAST } + public static boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { + return + (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && + mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + + /* + * This test collector operator is for MapJoin row-mode. + */ + public static class TestMultiSetCollectorOperator extends RowCollectorTestOperator { + + private final RowTestObjectsMultiSet testRowMultiSet; + + public TestMultiSetCollectorOperator( + ObjectInspector[] outputObjectInspectors, + RowTestObjectsMultiSet testRowMultiSet) { + super(outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public RowTestObjectsMultiSet getTestRowMultiSet() { + return testRowMultiSet; + } + + public void nextTestRow(RowTestObjects testRow) { + testRowMultiSet.add(testRow, RowTestObjectsMultiSet.RowFlag.NONE); + } + + @Override + public String getName() { + return TestMultiSetCollectorOperator.class.getSimpleName(); + } + } + + public static class TestMultiSetVectorCollectorOperator extends RowVectorCollectorTestOperator { + + private final RowTestObjectsMultiSet testRowMultiSet; + + public RowTestObjectsMultiSet getTestRowMultiSet() { + return testRowMultiSet; + } + + public TestMultiSetVectorCollectorOperator(TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors, RowTestObjectsMultiSet testRowMultiSet) + throws HiveException { + super(outputTypeInfos, outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public TestMultiSetVectorCollectorOperator( + int[] outputProjectionColumnNums, + TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors, + RowTestObjectsMultiSet testRowMultiSet) throws HiveException { + super(outputProjectionColumnNums, outputTypeInfos, outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public void nextTestRow(RowTestObjects testRow) { + testRowMultiSet.add(testRow, RowTestObjectsMultiSet.RowFlag.NONE); + } + + @Override + public String getName() { + return TestMultiSetVectorCollectorOperator.class.getSimpleName(); + } + } + public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { + return createMapJoinDesc(testDesc, false); + } + + public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc, + boolean isFullOuterIntersect) { MapJoinDesc mapJoinDesc = new MapJoinDesc(); + mapJoinDesc.setPosBigTable(0); - List keyExpr = new ArrayList(); + + List bigTableKeyExpr = new ArrayList(); for (int i = 0; i < testDesc.bigTableKeyColumnNums.length; i++) { - keyExpr.add(new ExprNodeColumnDesc(testDesc.bigTableKeyTypeInfos[i], testDesc.bigTableKeyColumnNames[i], "B", false)); + bigTableKeyExpr.add( + new ExprNodeColumnDesc( + testDesc.bigTableKeyTypeInfos[i], + testDesc.bigTableKeyColumnNames[i], "B", false)); } Map> keyMap = new HashMap>(); - keyMap.put((byte)0, keyExpr); + keyMap.put((byte) 0, bigTableKeyExpr); + + // Big Table expression includes all columns -- keys and extra (value) columns. + // UNDONE: Assumes all values retained... + List bigTableExpr = new ArrayList(); + for (int i = 0; i < testDesc.bigTableColumnNames.length; i++) { + bigTableExpr.add( + new ExprNodeColumnDesc( + testDesc.bigTableTypeInfos[i], + testDesc.bigTableColumnNames[i], "B", false)); + } + Map> exprMap = new HashMap>(); + exprMap.put((byte) 0, bigTableExpr); + + List smallTableKeyExpr = new ArrayList(); + + for (int i = 0; i < testDesc.smallTableKeyTypeInfos.length; i++) { + ExprNodeColumnDesc exprNodeColumnDesc = + new ExprNodeColumnDesc( + testDesc.smallTableKeyTypeInfos[i], + testDesc.smallTableKeyColumnNames[i], "S", false); + smallTableKeyExpr.add(exprNodeColumnDesc); + } + + // Retained Small Table keys and values. List smallTableExpr = new ArrayList(); - for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) { - smallTableExpr.add(new ExprNodeColumnDesc(testDesc.smallTableValueTypeInfos[i], testDesc.smallTableValueColumnNames[i], "S", false)); + final int smallTableRetainKeySize = testDesc.smallTableRetainKeyColumnNums.length; + for (int i = 0; i < smallTableRetainKeySize; i++) { + int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + smallTableExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableTypeInfos[smallTableKeyColumnNum], + testDesc.smallTableColumnNames[smallTableKeyColumnNum], "S", false)); } - keyMap.put((byte)1, smallTableExpr); + + final int smallTableRetainValueSize = testDesc.smallTableRetainValueColumnNums.length; + for (int i = 0; i < smallTableRetainValueSize; i++) { + int smallTableValueColumnNum = + smallTableRetainKeySize + testDesc.smallTableRetainValueColumnNums[i]; + smallTableExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableTypeInfos[smallTableValueColumnNum], + testDesc.smallTableColumnNames[smallTableValueColumnNum], "S", false)); + } + + keyMap.put((byte) 1, smallTableKeyExpr); + exprMap.put((byte) 1, smallTableExpr); mapJoinDesc.setKeys(keyMap); - mapJoinDesc.setExprs(keyMap); + mapJoinDesc.setExprs(exprMap); Byte[] order = new Byte[] {(byte) 0, (byte) 1}; mapJoinDesc.setTagOrder(order); - mapJoinDesc.setNoOuterJoin(testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER); + mapJoinDesc.setNoOuterJoin( + testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER && + testDesc.vectorMapJoinVariation != VectorMapJoinVariation.FULL_OUTER); Map> filterMap = new HashMap>(); filterMap.put((byte) 0, new ArrayList()); // None. mapJoinDesc.setFilters(filterMap); List bigTableRetainColumnNumsList = intArrayToList(testDesc.bigTableRetainColumnNums); - - // For now, just small table values... - List smallTableRetainColumnNumsList = intArrayToList(testDesc.smallTableRetainValueColumnNums); - Map> retainListMap = new HashMap>(); retainListMap.put((byte) 0, bigTableRetainColumnNumsList); - retainListMap.put((byte) 1, smallTableRetainColumnNumsList); + + // For now, just small table keys/values... + if (testDesc.smallTableRetainKeyColumnNums.length == 0) { + + // Just the value columns numbers with retain. + List smallTableValueRetainColumnNumsList = + intArrayToList(testDesc.smallTableRetainValueColumnNums); + + retainListMap.put((byte) 1, smallTableValueRetainColumnNumsList); + } else { + + // Both the key/value columns numbers. + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + + ArrayList smallTableValueIndicesNumsList = new ArrayList();; + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + smallTableValueIndicesNumsList.add(testDesc.smallTableRetainKeyColumnNums[i]); + } + for (int i = 0; i < testDesc.smallTableRetainValueColumnNums.length; i++) { + smallTableValueIndicesNumsList.add(-testDesc.smallTableRetainValueColumnNums[i] - 1); + } + int[] smallTableValueIndicesNums = + ArrayUtils.toPrimitive(smallTableValueIndicesNumsList.toArray(new Integer[0])); + + Map valueIndicesMap = new HashMap(); + valueIndicesMap.put((byte) 1, smallTableValueIndicesNums); + mapJoinDesc.setValueIndices(valueIndicesMap); + } mapJoinDesc.setRetainList(retainListMap); + switch (testDesc.mapJoinPlanVariation) { + case DYNAMIC_PARTITION_HASH_JOIN: + // FULL OUTER which behaves differently for dynamic partition hash join. + mapJoinDesc.setDynamicPartitionHashJoin(true); + break; + default: + throw new RuntimeException( + "Unexpected map join plan variation " + testDesc.mapJoinPlanVariation); + } + int joinDescType; switch (testDesc.vectorMapJoinVariation) { case INNER: @@ -141,6 +315,9 @@ public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { case OUTER: joinDescType = JoinDesc.LEFT_OUTER_JOIN; break; + case FULL_OUTER: + joinDescType = JoinDesc.FULL_OUTER_JOIN; + break; default: throw new RuntimeException("unknown operator variation " + testDesc.vectorMapJoinVariation); } @@ -149,12 +326,25 @@ public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { mapJoinDesc.setConds(conds); TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(testDesc.hiveConf, PlanUtils - .getFieldSchemasFromColumnList(keyExpr, "")); + .getFieldSchemasFromColumnList(smallTableKeyExpr, "")); mapJoinDesc.setKeyTblDesc(keyTableDesc); + // Small Table expression value columns. + List smallTableValueExpr = new ArrayList(); + + // All Small Table keys and values. + for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) { + smallTableValueExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableValueTypeInfos[i], + testDesc.smallTableValueColumnNames[i], "S", false)); + } + TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc( - PlanUtils.getFieldSchemasFromColumnList(smallTableExpr, "")); + PlanUtils.getFieldSchemasFromColumnList(smallTableValueExpr, "")); ArrayList valueTableDescsList = new ArrayList(); + + // Big Table entry, then Small Table entry. valueTableDescsList.add(null); valueTableDescsList.add(valueTableDesc); mapJoinDesc.setValueTblDescs(valueTableDescsList); @@ -180,6 +370,7 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t hashTableKind = HashTableKind.HASH_SET; break; case OUTER: + case FULL_OUTER: hashTableKind = HashTableKind.HASH_MAP; break; default: @@ -190,9 +381,17 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t if (testDesc.bigTableKeyTypeInfos.length == 1) { switch (((PrimitiveTypeInfo) testDesc.bigTableKeyTypeInfos[0]).getPrimitiveCategory()) { case BOOLEAN: + hashTableKeyType = HashTableKeyType.BOOLEAN; + break; case BYTE: + hashTableKeyType = HashTableKeyType.BYTE; + break; case SHORT: + hashTableKeyType = HashTableKeyType.SHORT; + break; case INT: + hashTableKeyType = HashTableKeyType.INT; + break; case LONG: hashTableKeyType = HashTableKeyType.LONG; break; @@ -216,49 +415,112 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t vectorDesc.setAllBigTableKeyExpressions(null); - vectorMapJoinInfo.setBigTableValueColumnMap(new int[0]); - vectorMapJoinInfo.setBigTableValueColumnNames(new String[0]); - vectorMapJoinInfo.setBigTableValueTypeInfos(new TypeInfo[0]); + vectorMapJoinInfo.setBigTableValueColumnMap(testDesc.bigTableColumnNums); + vectorMapJoinInfo.setBigTableValueColumnNames(testDesc.bigTableColumnNames); + vectorMapJoinInfo.setBigTableValueTypeInfos(testDesc.bigTableTypeInfos); vectorMapJoinInfo.setSlimmedBigTableValueExpressions(null); vectorDesc.setAllBigTableValueExpressions(null); + vectorMapJoinInfo.setBigTableFilterExpressions(new VectorExpression[0]); + + + /* + * Column mapping. + */ + VectorColumnOutputMapping bigTableRetainMapping = + new VectorColumnOutputMapping("Big Table Retain Mapping"); + + VectorColumnOutputMapping nonOuterSmallTableKeyMapping = + new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping"); + + VectorColumnOutputMapping outerSmallTableKeyMapping = + new VectorColumnOutputMapping("Outer Small Table Key Mapping"); + + VectorColumnSourceMapping fullOuterSmallTableKeyMapping = + new VectorColumnSourceMapping("Full Outer Small Table Key Mapping"); + VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); + int nextOutputColumn = 0; + + final int bigTableRetainedSize = testDesc.bigTableRetainColumnNums.length; + for (int i = 0; i < bigTableRetainedSize; i++) { + final int batchColumnIndex = testDesc.bigTableRetainColumnNums[i]; + TypeInfo typeInfo = testDesc.bigTableTypeInfos[i]; + projectionMapping.add( + nextOutputColumn, batchColumnIndex, typeInfo); + // Collect columns we copy from the big table batch to the overflow batch. + if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) { - VectorColumnOutputMapping bigTableRetainedMapping = - new VectorColumnOutputMapping("Big Table Retained Mapping"); - for (int i = 0; i < testDesc.bigTableTypeInfos.length; i++) { - bigTableRetainedMapping.add(i, i, testDesc.bigTableTypeInfos[i]); - projectionMapping.add(i, i, testDesc.bigTableKeyTypeInfos[i]); + // Tolerate repeated use of a big table column. + bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + } + nextOutputColumn++; } - VectorColumnOutputMapping bigTableOuterKeyMapping = - new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + int emulateScratchColumn = testDesc.bigTableTypeInfos.length; + + VectorColumnOutputMapping smallTableKeyOutputMapping = + new VectorColumnOutputMapping("Small Table Key Output Mapping"); + final int smallTableKeyRetainSize = testDesc.smallTableRetainKeyColumnNums.length; + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + final int bigTableKeyColumnNum = testDesc.bigTableKeyColumnNums[smallTableKeyColumnNum]; + TypeInfo keyTypeInfo = testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum]; + if (!isOuterJoin) { + // Project the big table key into the small table result "area". + projectionMapping.add(nextOutputColumn, bigTableKeyColumnNum, keyTypeInfo); + if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumnNum)) { + nonOuterSmallTableKeyMapping.add(bigTableKeyColumnNum, bigTableKeyColumnNum, keyTypeInfo); + } + } else { + outerSmallTableKeyMapping.add(bigTableKeyColumnNum, emulateScratchColumn, keyTypeInfo); + projectionMapping.add(nextOutputColumn, emulateScratchColumn, keyTypeInfo); + + // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key + // into the output result. + fullOuterSmallTableKeyMapping.add(smallTableKeyColumnNum, emulateScratchColumn, keyTypeInfo); + emulateScratchColumn++; + } + nextOutputColumn++; + } // The order of the fields in the LazyBinary small table value must be used, so // we use the source ordering flavor for the mapping. - VectorColumnSourceMapping smallTableMapping = - new VectorColumnSourceMapping("Small Table Mapping"); - int outputColumn = testDesc.bigTableTypeInfos.length; + VectorColumnSourceMapping smallTableValueMapping = + new VectorColumnSourceMapping("Small Table Value Mapping"); for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { - smallTableMapping.add(i, outputColumn, testDesc.smallTableValueTypeInfos[i]); - projectionMapping.add(outputColumn, outputColumn, testDesc.smallTableValueTypeInfos[i]); - outputColumn++; + smallTableValueMapping.add(i, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]); + projectionMapping.add(nextOutputColumn, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]); + emulateScratchColumn++; + nextOutputColumn++; } // Convert dynamic arrays and maps to simple arrays. - bigTableRetainedMapping.finalize(); + bigTableRetainMapping.finalize(); + vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns()); + vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos()); - bigTableOuterKeyMapping.finalize(); + nonOuterSmallTableKeyMapping.finalize(); + vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns()); + vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos()); - smallTableMapping.finalize(); + outerSmallTableKeyMapping.finalize(); + fullOuterSmallTableKeyMapping.finalize(); - vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); - vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); - vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping); + vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping); + + smallTableValueMapping.finalize(); + + vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping); projectionMapping.finalize(); @@ -267,7 +529,9 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t vectorMapJoinInfo.setProjectionMapping(projectionMapping); - assert projectionMapping.getCount() == testDesc.outputColumnNames.length; + if (projectionMapping.getCount() != testDesc.outputColumnNames.length) { + throw new RuntimeException(); + }; vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); @@ -306,6 +570,11 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( new VectorMapJoinOuterLongOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); break; + case FULL_OUTER: + operator = + new VectorMapJoinFullOuterLongOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); } @@ -331,6 +600,10 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( operator = new VectorMapJoinOuterStringOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); + case FULL_OUTER: + operator = + new VectorMapJoinFullOuterStringOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); @@ -358,6 +631,11 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( new VectorMapJoinOuterMultiKeyOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); break; + case FULL_OUTER: + operator = + new VectorMapJoinFullOuterMultiKeyOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); } @@ -365,16 +643,31 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( default: throw new RuntimeException("Unknown hash table key type " + vectorDesc.getHashTableKeyType()); } + System.out.println("*BENCHMARK* createNativeVectorMapJoinOperator " + + operator.getClass().getSimpleName()); return operator; } public static VectorizationContext createVectorizationContext(MapJoinTestDescription testDesc) throws HiveException { VectorizationContext vContext = - new VectorizationContext("test", testDesc.bigTableColumnNamesList); + new VectorizationContext("test", testDesc.bigTableColumnNameList); + + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + if (isOuterJoin) { + + // We need physical columns. + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + final int smallTableKeyRetainColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + vContext.allocateScratchColumn(testDesc.smallTableKeyTypeInfos[smallTableKeyRetainColumnNum]); + } + } // Create scratch columns to hold small table results. - for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { + for (int i = 0; i < testDesc.smallTableRetainValueColumnNums.length; i++) { vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]); } return vContext; @@ -390,19 +683,19 @@ public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoi final Byte smallTablePos = 1; - // UNDONE: Why do we need to specify BinarySortableSerDe explicitly here??? TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc(); AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance( BinarySortableSerDe.class, null); SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null); MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false); - TableDesc valueTableDesc; + final List valueTableDescList; if (mapJoinDesc.getNoOuterJoin()) { - valueTableDesc = mapJoinDesc.getValueTblDescs().get(smallTablePos); + valueTableDescList = mapJoinDesc.getValueTblDescs(); } else { - valueTableDesc = mapJoinDesc.getValueFilteredTblDescs().get(smallTablePos); + valueTableDescList = mapJoinDesc.getValueFilteredTblDescs(); } + TableDesc valueTableDesc = valueTableDescList.get(smallTablePos); AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance( valueTableDesc.getDeserializerClass(), null); SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); @@ -414,16 +707,19 @@ public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoi } public static void connectOperators( - MapJoinTestDescription testDesc, Operator operator, - Operator testCollectorOperator) throws HiveException { - Operator[] parents = new Operator[] {operator}; - testCollectorOperator.setParentOperators(Arrays.asList(parents)); - Operator[] childOperators = new Operator[] {testCollectorOperator}; - operator.setChildOperators(Arrays.asList(childOperators)); - HiveConf.setBoolVar(testDesc.hiveConf, - HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - operator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + Operator childOperator) throws HiveException { + + List> newParentOperators = newOperatorList(); + newParentOperators.addAll(childOperator.getParentOperators()); + newParentOperators.add(operator); + childOperator.setParentOperators(newParentOperators); + + List> newChildOperators = newOperatorList(); + newChildOperators.addAll(operator.getChildOperators()); + newChildOperators.add(childOperator); + operator.setChildOperators(newChildOperators); + } private static List intArrayToList(int[] intArray) { @@ -509,9 +805,25 @@ private static void loadTableContainerData(MapJoinTestDescription testDesc, MapJ mapJoinTableContainer.seal(); } - public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, - Operator collectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin) + public static class CreateMapJoinResult { + public final MapJoinOperator mapJoinOperator; + public final MapJoinTableContainer mapJoinTableContainer; + public final MapJoinTableContainerSerDe mapJoinTableContainerSerDe; + + public CreateMapJoinResult( + MapJoinOperator mapJoinOperator, + MapJoinTableContainer mapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) { + this.mapJoinOperator = mapJoinOperator; + this.mapJoinTableContainer = mapJoinTableContainer; + this.mapJoinTableContainerSerDe = mapJoinTableContainerSerDe; + } + } + public static CreateMapJoinResult createMapJoin( + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin, + MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException { final Byte bigTablePos = 0; @@ -539,11 +851,16 @@ public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, operator = new MapJoinOperator(new CompilationOpContext()); operator.setConf(mapJoinDesc); } else { - VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNamesList); + VectorizationContext vContext = + new VectorizationContext("test", testDesc.bigTableColumnNameList); + + /* + // UNDONE: Unclear this belonds in the input VectorizationContext... // Create scratch columns to hold small table results. for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]); } + */ // This is what the Vectorizer class does. VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc(); @@ -571,21 +888,20 @@ public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, } } - MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator); - - operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe); + HiveConf.setBoolVar(testDesc.hiveConf, + HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - return operator; + return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe); } - public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, - Operator collectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) + public static CreateMapJoinResult createNativeVectorMapJoin( + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType, + MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException { VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc); - - // UNDONE mapJoinDesc.setVectorDesc(vectorDesc); vectorDesc.setHashTableImplementationType(hashTableImplementationType); @@ -593,13 +909,14 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo(); MapJoinTableContainer mapJoinTableContainer; + MapJoinTableContainerSerDe mapJoinTableContainerSerDe = null; switch (vectorDesc.getHashTableImplementationType()) { case OPTIMIZED: mapJoinTableContainer = new MapJoinBytesTableContainer( testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0); - MapJoinTableContainerSerDe mapJoinTableContainerSerDe = + mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc); mapJoinTableContainer.setSerde( @@ -615,7 +932,11 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType()); } - loadTableContainerData(testDesc, testData, mapJoinTableContainer); +// if (shareMapJoinTableContainer == null) { + loadTableContainerData(testDesc, testData, mapJoinTableContainer); +// } else { +// setTableContainerData(mapJoinTableContainer, shareMapJoinTableContainer); +// } VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc); @@ -636,56 +957,295 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t vectorDesc, vContext); - MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator); + HiveConf.setBoolVar(testDesc.hiveConf, + HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null); + return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe); + } - return operator; + public static CreateMapJoinResult createMapJoinImplementation( + MapJoinTestImplementation mapJoinImplementation, + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc) + throws SerDeException, IOException, HiveException { + return createMapJoinImplementation( + mapJoinImplementation, testDesc, testData, mapJoinDesc, null); } - public static MapJoinOperator createMapJoinImplementation(MapJoinTestImplementation mapJoinImplementation, + public static CreateMapJoinResult createMapJoinImplementation( + MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, - Operator testCollectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc) throws SerDeException, IOException, HiveException { + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, + MapJoinTableContainer shareMapJoinTableContainer) + throws SerDeException, IOException, HiveException { - MapJoinOperator operator; + CreateMapJoinResult result; switch (mapJoinImplementation) { case ROW_MODE_HASH_MAP: // MapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ false, - /* isOriginalMapJoin */ true); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ false, + /* isOriginalMapJoin */ true, + shareMapJoinTableContainer); break; case ROW_MODE_OPTIMIZED: // MapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ false, - /* isOriginalMapJoin */ false); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ false, + /* isOriginalMapJoin */ false, + shareMapJoinTableContainer); break; case VECTOR_PASS_THROUGH: // VectorMapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ true, - /* n/a */ false); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ true, + /* n/a */ false, + shareMapJoinTableContainer); break; case NATIVE_VECTOR_OPTIMIZED: - operator = MapJoinTestConfig.createNativeVectorMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, HashTableImplementationType.OPTIMIZED); + result = MapJoinTestConfig.createNativeVectorMapJoin( + testDesc, testData, mapJoinDesc, + HashTableImplementationType.OPTIMIZED, + shareMapJoinTableContainer); break; case NATIVE_VECTOR_FAST: - operator = MapJoinTestConfig.createNativeVectorMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, HashTableImplementationType.FAST); + result = MapJoinTestConfig.createNativeVectorMapJoin( + testDesc, testData, mapJoinDesc, + HashTableImplementationType.FAST, + shareMapJoinTableContainer); break; default: throw new RuntimeException("Unexpected MapJoin Operator Implementation " + mapJoinImplementation); } - return operator; + return result; + } + + private static Operator makeInterceptSelectOperator( + MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, + String[] outputColumnNames, TypeInfo[] outputTypeInfos) { + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + List selectExprList = new ArrayList(); + List selectOutputColumnNameList = new ArrayList(); + for (int i = 0; i < bigTableRetainSize; i++) { + String selectOutputColumnName = "_col" + i; + selectOutputColumnNameList.add(selectOutputColumnName); + + TypeInfo outputTypeInfo = outputTypeInfos[i]; + if (i < bigTableKeySize) { + + // Big Table key. + ExprNodeColumnDesc keyColumnExpr = + new ExprNodeColumnDesc( + outputTypeInfo, + outputColumnNames[i], "test", false); + selectExprList.add(keyColumnExpr); + } else { + + // For row-mode, substitute NULL constant for any non-key extra Big Table columns. + ExprNodeConstantDesc nullExtraColumnExpr = + new ExprNodeConstantDesc( + outputTypeInfo, + null); + nullExtraColumnExpr.setFoldedFromCol(outputColumnNames[i]); + selectExprList.add(nullExtraColumnExpr); + } + } + + SelectDesc selectDesc = new SelectDesc(selectExprList, selectOutputColumnNameList); + Operator selectOperator = + OperatorFactory.get(new CompilationOpContext(), selectDesc); + + return selectOperator; + } + + private static Operator vectorizeInterceptSelectOperator( + MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, + Operator selectOperator) throws HiveException{ + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + VectorizationContext vOutContext = + ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext(); + + SelectDesc selectDesc = (SelectDesc) selectOperator.getConf(); + List selectExprs = selectDesc.getColList(); + + VectorExpression[] selectVectorExpr = new VectorExpression[bigTableRetainSize]; + for (int i = 0; i < bigTableRetainSize; i++) { + + TypeInfo typeInfo = selectExprs.get(i).getTypeInfo(); + if (i < bigTableKeySize) { + + // Big Table key. + selectVectorExpr[i] = vOutContext.getVectorExpression(selectExprs.get(i)); + } else { + + // For vector-mode, for test purposes we substitute a NO-OP (we don't want to modify + // the batch). + + // FULL OUTER INTERCEPT does not look at non-key columns. + + NoOpExpression noOpExpression = new NoOpExpression(i); + + noOpExpression.setInputTypeInfos(typeInfo); + noOpExpression.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); + + noOpExpression.setOutputTypeInfo(typeInfo); + noOpExpression.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + selectVectorExpr[i] = noOpExpression; + } + } + + System.out.println("*BENCHMARK* VectorSelectOperator selectVectorExpr " + + Arrays.toString(selectVectorExpr)); + + int[] projectedColumns = + ArrayUtils.toPrimitive( + vOutContext.getProjectedColumns().subList(0, bigTableRetainSize). + toArray(new Integer[0])); + System.out.println("*BENCHMARK* VectorSelectOperator projectedColumns " + + Arrays.toString(projectedColumns)); + + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + vectorSelectDesc.setSelectExpressions(selectVectorExpr); + vectorSelectDesc.setProjectedOutputColumns(projectedColumns); + + Operator vectorSelectOperator = OperatorFactory.getVectorOperator( + selectOperator.getCompilationOpContext(), selectDesc, + vOutContext, vectorSelectDesc); + + return vectorSelectOperator; + } + + public static CountCollectorTestOperator addFullOuterIntercept( + MapJoinTestImplementation mapJoinImplementation, + MapJoinTestDescription testDesc, + RowTestObjectsMultiSet outputTestRowMultiSet, MapJoinTestData testData, + MapJoinOperator mapJoinOperator, MapJoinTableContainer mapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) + throws SerDeException, IOException, HiveException { + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + // For FULL OUTER MapJoin, we require all Big Keys to be present in the output result. + // The first N output columns are the Big Table key columns. + Map> keyMap = mapJoinDesc.getKeys(); + List bigTableKeyExprs = keyMap.get((byte) 0); + final int bigTableKeySize = bigTableKeyExprs.size(); + + Map> retainMap = mapJoinDesc.getRetainList(); + List bigTableRetainList = retainMap.get((byte) 0); + final int bigTableRetainSize = bigTableRetainList.size(); + + List outputColumnNameList = mapJoinDesc.getOutputColumnNames(); + String[] mapJoinOutputColumnNames = outputColumnNameList.toArray(new String[0]); + + // Use a utility method to get the MapJoin output TypeInfo. + TypeInfo[] mapJoinOutputTypeInfos = VectorMapJoinBaseOperator.getOutputTypeInfos(mapJoinDesc); + + final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation); + + /* + * Always create a row-mode SelectOperator. If we are vector-mode, next we will use its + * expressions and replace it with a VectorSelectOperator. + */ + Operator selectOperator = + makeInterceptSelectOperator( + mapJoinOperator, bigTableKeySize, bigTableRetainSize, + mapJoinOutputColumnNames, mapJoinOutputTypeInfos); + + List selectOutputColumnNameList = + ((SelectDesc) selectOperator.getConf()).getOutputColumnNames(); + String[] selectOutputColumnNames = + selectOutputColumnNameList.toArray(new String[0]); + + if (isVectorOutput) { + selectOperator = + vectorizeInterceptSelectOperator( + mapJoinOperator, bigTableKeySize, bigTableRetainSize, selectOperator); + } + + /* + * Create test description just for FULL OUTER INTERCEPT with different + */ + MapJoinTestDescription interceptTestDesc = + new MapJoinTestDescription( + testDesc.hiveConf, testDesc.vectorMapJoinVariation, + selectOutputColumnNames, + Arrays.copyOf(mapJoinOutputTypeInfos, bigTableRetainSize), + testDesc.bigTableKeyColumnNums, + testDesc.smallTableValueTypeInfos, + testDesc.smallTableRetainKeyColumnNums, + testDesc.smallTableGenerationParameters, + testDesc.mapJoinPlanVariation); + + MapJoinDesc intersectMapJoinDesc = + createMapJoinDesc(interceptTestDesc, /* isFullOuterIntersect */ true); + + /* + * Create FULL OUTER INTERSECT MapJoin operator. + */ + CreateMapJoinResult interceptCreateMapJoinResult = + createMapJoinImplementation( + mapJoinImplementation, interceptTestDesc, testData, intersectMapJoinDesc); + MapJoinOperator intersectMapJoinOperator = + interceptCreateMapJoinResult.mapJoinOperator; + MapJoinTableContainer intersectMapJoinTableContainer = + interceptCreateMapJoinResult.mapJoinTableContainer; + MapJoinTableContainerSerDe interceptMapJoinTableContainerSerDe = + interceptCreateMapJoinResult.mapJoinTableContainerSerDe; + + connectOperators(mapJoinOperator, selectOperator); + + connectOperators(selectOperator, intersectMapJoinOperator); + + CountCollectorTestOperator interceptTestCollectorOperator; + if (!isVectorOutput) { + interceptTestCollectorOperator = + new TestMultiSetCollectorOperator( + interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet); + } else { + VectorizationContext vContext = + ((VectorizationContextRegion) intersectMapJoinOperator).getOutputVectorizationContext(); + int[] intersectProjectionColumns = + ArrayUtils.toPrimitive(vContext.getProjectedColumns().toArray(new Integer[0])); + interceptTestCollectorOperator = + new TestMultiSetVectorCollectorOperator( + intersectProjectionColumns, + interceptTestDesc.outputTypeInfos, + interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet); + } + + connectOperators(intersectMapJoinOperator, interceptTestCollectorOperator); + + // Setup the FULL OUTER INTERSECT MapJoin's inputObjInspector to include the Small Table, etc. + intersectMapJoinOperator.setInputObjInspectors(interceptTestDesc.inputObjectInspectors); + + // Now, invoke initializeOp methods from the root MapJoin operator. + mapJoinOperator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables container references to our test data. + mapJoinOperator.setTestMapJoinTableContainer( + 1, mapJoinTableContainer, mapJoinTableContainerSerDe); + intersectMapJoinOperator.setTestMapJoinTableContainer( + 1, intersectMapJoinTableContainer, interceptMapJoinTableContainerSerDe); + + return interceptTestCollectorOperator; + } + + private static List> newOperatorList() { + return new ArrayList>(); } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java index d763695..4896454 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Random; import java.util.Map.Entry; @@ -27,7 +28,9 @@ import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; @@ -37,6 +40,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -44,107 +48,170 @@ public class MapJoinTestData { - final long bigTableRandomSeed; - final long smallTableRandomSeed; + final Random random; - final GenerateType[] generateTypes; - final VectorBatchGenerator generator; + final List generationSpecList; + final VectorRandomRowSource bigTableRowSource; + final Object[][] bigTableRandomRows; + final VectorRandomBatchSource bigTableBatchSource; public final VectorizedRowBatch bigTableBatch; - public final VectorBatchGenerateStream bigTableBatchStream; - final SmallTableGenerationParameters smallTableGenerationParameters; HashMap smallTableKeyHashMap; + List fullOuterAdditionalSmallTableKeys; + ArrayList smallTableValueCounts; ArrayList> smallTableValues; public MapJoinTestData(int rowCount, MapJoinTestDescription testDesc, - long bigTableRandomSeed, long smallTableRandomSeed) throws HiveException { + long randomSeed) throws HiveException { + + random = new Random(randomSeed); + + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + generationSpecList = generationSpecListFromTypeInfos( + testDesc.bigTableTypeInfos, + testDesc.bigTableKeyColumnNums.length, + isOuterJoin); - this.bigTableRandomSeed = bigTableRandomSeed; + bigTableRowSource = new VectorRandomRowSource(); - this.smallTableRandomSeed = smallTableRandomSeed; + bigTableRowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, null); - generateTypes = generateTypesFromTypeInfos(testDesc.bigTableTypeInfos); - generator = new VectorBatchGenerator(generateTypes); + // UNDONE: 100000 + bigTableRandomRows = bigTableRowSource.randomRows(10); - bigTableBatch = generator.createBatch(); + bigTableBatchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + bigTableRowSource, + bigTableRandomRows, + null); + + bigTableBatch = createBigTableBatch(testDesc); // Add small table result columns. - ColumnVector[] newCols = new ColumnVector[bigTableBatch.cols.length + testDesc.smallTableValueTypeInfos.length]; + + // Only [FULL] OUTER MapJoin needs a physical column. + final int smallTableRetainKeySize = + (isOuterJoin ? testDesc.smallTableRetainKeyColumnNums.length : 0); + ColumnVector[] newCols = + new ColumnVector[ + bigTableBatch.cols.length + + smallTableRetainKeySize + + testDesc.smallTableValueTypeInfos.length]; System.arraycopy(bigTableBatch.cols, 0, newCols, 0, bigTableBatch.cols.length); + int colIndex = bigTableBatch.cols.length; + + if (isOuterJoin) { + for (int s = 0; s < smallTableRetainKeySize; s++) { + final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[s]; + newCols[colIndex++] = + VectorizedBatchUtil.createColumnVector( + testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum]); + } + } for (int s = 0; s < testDesc.smallTableValueTypeInfos.length; s++) { - newCols[bigTableBatch.cols.length + s] = + newCols[colIndex++] = VectorizedBatchUtil.createColumnVector(testDesc.smallTableValueTypeInfos[s]); } bigTableBatch.cols = newCols; bigTableBatch.numCols = newCols.length; - - // This stream will be restarted with the same random seed over and over. - bigTableBatchStream = new VectorBatchGenerateStream( - bigTableRandomSeed, generator, rowCount); - VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + VectorExtractRow keyVectorExtractRow = new VectorExtractRow(); + keyVectorExtractRow.init(testDesc.bigTableKeyTypeInfos, testDesc.bigTableKeyColumnNums); smallTableGenerationParameters = testDesc.getSmallTableGenerationParameters(); + HashMap bigTableKeyHashMap = new HashMap(); smallTableKeyHashMap = new HashMap(); - Random smallTableRandom = new Random(smallTableRandomSeed); - // Start small table random generation - // from beginning. ValueOption valueOption = smallTableGenerationParameters.getValueOption(); - int keyOutOfAThousand = smallTableGenerationParameters.getKeyOutOfAThousand(); - - bigTableBatchStream.reset(); - while (bigTableBatchStream.isNext()) { - bigTableBatch.reset(); - bigTableBatchStream.fillNext(bigTableBatch); + if (valueOption != ValueOption.NO_REGULAR_SMALL_KEYS) { + int keyOutOfAThousand = smallTableGenerationParameters.getKeyOutOfAThousand(); - final int size = bigTableBatch.size; - for (int i = 0; i < size; i++) { + bigTableBatchSource.resetBatchIteration(); + while (bigTableBatchSource.fillNextBatch(bigTableBatch)) { - if (smallTableRandom.nextInt(1000) <= keyOutOfAThousand) { + final int size = bigTableBatch.size; + for (int logical = 0; logical < size; logical++) { + final int batchIndex = + (bigTableBatch.selectedInUse ? bigTableBatch.selected[logical] : logical); - RowTestObjects testKey = getTestKey(bigTableBatch, i, vectorExtractRow, + RowTestObjects testKey = getTestKey(bigTableBatch, batchIndex, keyVectorExtractRow, testDesc.bigTableKeyTypeInfos.length, testDesc.bigTableObjectInspectors); + bigTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); - if (valueOption == ValueOption.ONLY_ONE) { - if (smallTableKeyHashMap.containsKey(testKey)) { - continue; + if (random.nextInt(1000) <= keyOutOfAThousand) { + + if (valueOption == ValueOption.ONLY_ONE) { + if (smallTableKeyHashMap.containsKey(testKey)) { + continue; + } } + smallTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); } - smallTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); } } } //--------------------------------------------------------------------------------------------- - // UNDONE: For now, don't add more small keys... - /* - // Add more small table keys that are not in Big Table batches. - final int smallTableAdditionalLength = 1 + random.nextInt(4); - final int smallTableAdditionalSize = smallTableAdditionalLength * maxBatchSize; - VectorizedRowBatch[] smallTableAdditionalBatches = createBigTableBatches(generator, smallTableAdditionalLength); - for (int i = 0; i < smallTableAdditionalLength; i++) { - generator.generateBatch(smallTableAdditionalBatches[i], random, maxBatchSize); + // Add more small table keys that are not in Big Table or Small Table for FULL OUTER. + + fullOuterAdditionalSmallTableKeys = new ArrayList(); + + VectorRandomRowSource altBigTableRowSource = new VectorRandomRowSource(); + + altBigTableRowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, + /* allowNull */ true, /* isUnicodeOk */ true, null); + + Object[][] altBigTableRandomRows = altBigTableRowSource.randomRows(10000); + + VectorRandomBatchSource altBigTableBatchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + altBigTableRowSource, + altBigTableRandomRows, + null); + + altBigTableBatchSource.resetBatchIteration(); + while (altBigTableBatchSource.fillNextBatch(bigTableBatch)) { + final int size = bigTableBatch.size; + for (int logical = 0; logical < size; logical++) { + final int batchIndex = + (bigTableBatch.selectedInUse ? bigTableBatch.selected[logical] : logical); + RowTestObjects testKey = getTestKey(bigTableBatch, batchIndex, keyVectorExtractRow, + testDesc.bigTableKeyTypeInfos.length, + testDesc.bigTableObjectInspectors); + if (bigTableKeyHashMap.containsKey(testKey) || + smallTableKeyHashMap.containsKey(testKey)) { + continue; + } + RowTestObjects testKeyClone = (RowTestObjects) testKey.clone(); + smallTableKeyHashMap.put(testKeyClone, -1); + fullOuterAdditionalSmallTableKeys.add(testKeyClone); + } } - TestRow[] additionalTestKeys = getTestKeys(smallTableAdditionalBatches, vectorExtractRow, - testDesc.bigTableKeyTypeInfos.length, testDesc.bigTableObjectInspectors); - final int smallTableAdditionKeyProbes = smallTableAdditionalSize / 2; - for (int i = 0; i < smallTableAdditionKeyProbes; i++) { - int index = random.nextInt(smallTableAdditionalSize); - TestRow additionalTestKey = additionalTestKeys[index]; - smallTableKeyHashMap.put((TestRow) additionalTestKey.clone(), -1); + + // Make sure there is a NULL key. + Object[] nullKeyRowObjects = new Object[testDesc.bigTableKeyTypeInfos.length]; + RowTestObjects nullTestKey = new RowTestObjects(nullKeyRowObjects); + if (!smallTableKeyHashMap.containsKey(nullTestKey)) { + smallTableKeyHashMap.put(nullTestKey, -1); + fullOuterAdditionalSmallTableKeys.add(nullTestKey); } - */ // Number the test rows with collection order. int addCount = 0; @@ -152,17 +219,28 @@ public MapJoinTestData(int rowCount, MapJoinTestDescription testDesc, testRowEntry.setValue(addCount++); } - generateVariationData(this, testDesc, smallTableRandom); + generateVariationData(this, testDesc, random); } - public VectorBatchGenerateStream getBigTableBatchStream() { - return bigTableBatchStream; + public VectorRandomBatchSource getBigTableBatchSource() { + return bigTableBatchSource; } public VectorizedRowBatch getBigTableBatch() { return bigTableBatch; } + public VectorizedRowBatch createBigTableBatch(MapJoinTestDescription testDesc) { + final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; + VectorizedRowBatch batch = new VectorizedRowBatch(bigTableColumnCount); + for (int i = 0; i < bigTableColumnCount; i++) { + batch.cols[i] = + VectorizedBatchUtil.createColumnVector( + testDesc.bigTableTypeInfos[i]); + } + return batch; + } + private RowTestObjects getTestKey(VectorizedRowBatch bigTableBatch, int batchIndex, VectorExtractRow vectorExtractRow, int columnCount, ObjectInspector[] objectInspectors) { Object[] rowObjects = new Object[columnCount]; @@ -177,37 +255,29 @@ public static void driveBigTableData(MapJoinTestDescription testDesc, MapJoinTes MapJoinOperator operator) throws HiveException { VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); - - final int columnCount = testDesc.bigTableKeyTypeInfos.length; - Object[] row = new Object[columnCount]; + vectorExtractRow.init(testDesc.bigTableTypeInfos); - testData.bigTableBatchStream.reset(); - while (testData.bigTableBatchStream.isNext()) { - testData.bigTableBatch.reset(); - testData.bigTableBatchStream.fillNext(testData.bigTableBatch); - - // Extract rows and call process per row - final int size = testData.bigTableBatch.size; - for (int r = 0; r < size; r++) { - vectorExtractRow.extractRow(testData.bigTableBatch, r, row); - operator.process(row, 0); - } + Object[][] bigTableRandomRows = testData.bigTableRandomRows; + final int rowCount = bigTableRandomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = bigTableRandomRows[i]; + operator.process(row, 0); } - operator.closeOp(false); + + // Close the operator tree. + operator.close(false); } public static void driveVectorBigTableData(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinOperator operator) throws HiveException { - testData.bigTableBatchStream.reset(); - while (testData.bigTableBatchStream.isNext()) { - testData.bigTableBatch.reset(); - testData.bigTableBatchStream.fillNext(testData.bigTableBatch); - + testData.bigTableBatchSource.resetBatchIteration(); + while (testData.bigTableBatchSource.fillNextBatch(testData.bigTableBatch)) { operator.process(testData.bigTableBatch, 0); } - operator.closeOp(false); + + // Close the operator tree. + operator.close(false); } public static void generateVariationData(MapJoinTestData testData, @@ -219,6 +289,7 @@ public static void generateVariationData(MapJoinTestData testData, break; case INNER: case OUTER: + case FULL_OUTER: testData.generateRandomSmallTableCounts(testDesc, random); testData.generateRandomSmallTableValues(testDesc, random); break; @@ -230,10 +301,15 @@ public static void generateVariationData(MapJoinTestData testData, private static RowTestObjects generateRandomSmallTableValueRow(MapJoinTestDescription testDesc, Random random) { final int columnCount = testDesc.smallTableValueTypeInfos.length; - Object[] smallTableValueRow = VectorRandomRowSource.randomWritablePrimitiveRow(columnCount, random, - testDesc.smallTableValuePrimitiveTypeInfos); + PrimitiveTypeInfo[] primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + for (int i = 0; i < columnCount; i++) { + primitiveTypeInfos[i] = (PrimitiveTypeInfo) testDesc.smallTableValueTypeInfos[i]; + } + Object[] smallTableValueRow = + VectorRandomRowSource.randomWritablePrimitiveRow( + columnCount, random, primitiveTypeInfos); for (int c = 0; c < smallTableValueRow.length; c++) { - smallTableValueRow[c] = ((PrimitiveObjectInspector) testDesc.smallTableObjectInspectors[c]).copyObject(smallTableValueRow[c]); + smallTableValueRow[c] = ((PrimitiveObjectInspector) testDesc.smallTableValueObjectInspectors[c]).copyObject(smallTableValueRow[c]); } return new RowTestObjects(smallTableValueRow); } @@ -241,7 +317,7 @@ private static RowTestObjects generateRandomSmallTableValueRow(MapJoinTestDescri private void generateRandomSmallTableCounts(MapJoinTestDescription testDesc, Random random) { smallTableValueCounts = new ArrayList(); for (Entry testKeyEntry : smallTableKeyHashMap.entrySet()) { - final int valueCount = 1 + random.nextInt(19); + final int valueCount = 1 + random.nextInt(3); smallTableValueCounts.add(valueCount); } } @@ -258,15 +334,26 @@ private void generateRandomSmallTableValues(MapJoinTestDescription testDesc, Ran } } - private static GenerateType[] generateTypesFromTypeInfos(TypeInfo[] typeInfos) { + private static List generationSpecListFromTypeInfos(TypeInfo[] typeInfos, + int keyCount, boolean isOuterJoin) { + + List generationSpecList = new ArrayList(); + final int size = typeInfos.length; - GenerateType[] generateTypes = new GenerateType[size]; for (int i = 0; i < size; i++) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfos[i]; - GenerateCategory category = - GenerateCategory.generateCategoryFromPrimitiveCategory(primitiveTypeInfo.getPrimitiveCategory()); - generateTypes[i] = new GenerateType(category); + TypeInfo typeInfo = typeInfos[i]; + final boolean columnAllowNulls; + if (i >= keyCount) { + + // Value columns can be NULL. + columnAllowNulls = true; + } else { + + // Non-OUTER JOIN operators expect NULL keys to have been filtered out. + columnAllowNulls = isOuterJoin; + } + generationSpecList.add(GenerationSpec.createSameType(typeInfo, columnAllowNulls)); } - return generateTypes; + return generationSpecList; } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java index bde4424..b8a08fd 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -27,20 +28,23 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; public class MapJoinTestDescription extends DescriptionTest { + public static enum MapJoinPlanVariation { + DYNAMIC_PARTITION_HASH_JOIN, + SHARED_SMALL_TABLE + } + public static class SmallTableGenerationParameters { public static enum ValueOption { NO_RESTRICTION, ONLY_ONE, - ONLY_TWO, - AT_LEAST_TWO + NO_REGULAR_SMALL_KEYS } private ValueOption valueOption; @@ -82,70 +86,103 @@ public int getNoMatchKeyOutOfAThousand() { final VectorMapJoinVariation vectorMapJoinVariation; // Adjustable. - public String[] bigTableColumnNames; + public String[] bigTableKeyColumnNames; public TypeInfo[] bigTableTypeInfos; + public int[] bigTableKeyColumnNums; - public String[] smallTableValueColumnNames; + public TypeInfo[] smallTableValueTypeInfos; - public int[] bigTableRetainColumnNums; + public int[] smallTableRetainKeyColumnNums; - public int[] smallTableRetainValueColumnNums; public SmallTableGenerationParameters smallTableGenerationParameters; // Derived. - public List bigTableColumnNamesList; - public String[] bigTableKeyColumnNames; - public TypeInfo[] bigTableKeyTypeInfos; - public List smallTableValueColumnNamesList; + + public int[] bigTableColumnNums; + public String[] bigTableColumnNames; + public List bigTableColumnNameList; public ObjectInspector[] bigTableObjectInspectors; - public List bigTableObjectInspectorsList; + public List bigTableObjectInspectorList; + + public TypeInfo[] bigTableKeyTypeInfos; + + public List smallTableKeyColumnNameList; + public String[] smallTableKeyColumnNames; + public TypeInfo[] smallTableKeyTypeInfos; + public ObjectInspector[] smallTableKeyObjectInspectors; + public List smallTableKeyObjectInspectorList; + + public List smallTableValueColumnNameList; + public String[] smallTableValueColumnNames; + public ObjectInspector[] smallTableValueObjectInspectors; + public List smallTableValueObjectInspectorList; + + public int[] bigTableRetainColumnNums; + public int[] smallTableRetainValueColumnNums; + + public String[] smallTableColumnNames; + public List smallTableColumnNameList; + public TypeInfo[] smallTableTypeInfos; + public List smallTableObjectInspectorList; + public StandardStructObjectInspector bigTableStandardObjectInspector; - public PrimitiveTypeInfo[] smallTableValuePrimitiveTypeInfos; - public ObjectInspector[] smallTableObjectInspectors; - public PrimitiveCategory[] smallTablePrimitiveCategories; - public List smallTableObjectInspectorsList; public StandardStructObjectInspector smallTableStandardObjectInspector; public ObjectInspector[] inputObjectInspectors; + public String[] outputColumnNames; public TypeInfo[] outputTypeInfos; public ObjectInspector[] outputObjectInspectors; + final MapJoinPlanVariation mapJoinPlanVariation; + + public MapJoinTestDescription ( + HiveConf hiveConf, + VectorMapJoinVariation vectorMapJoinVariation, + TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { + this( + hiveConf, + vectorMapJoinVariation, + /* bigTableColumnNames */ null, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + } + public MapJoinTestDescription ( HiveConf hiveConf, VectorMapJoinVariation vectorMapJoinVariation, - String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, + String[] bigTableColumnNames, + TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, - String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, - int[] bigTableRetainColumnNums, - int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, - SmallTableGenerationParameters smallTableGenerationParameters) { + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { super(hiveConf); + this.vectorMapJoinVariation = vectorMapJoinVariation; this.bigTableColumnNames = bigTableColumnNames; this.bigTableTypeInfos = bigTableTypeInfos; this.bigTableKeyColumnNums = bigTableKeyColumnNums; - this.smallTableValueColumnNames = smallTableValueColumnNames; + this.smallTableValueTypeInfos = smallTableValueTypeInfos; - this.bigTableRetainColumnNums = bigTableRetainColumnNums; - this.smallTableRetainKeyColumnNums = smallTableRetainKeyColumnNums; - this.smallTableRetainValueColumnNums = smallTableRetainValueColumnNums; + + this.smallTableRetainKeyColumnNums = smallTableRetainKeyColumnNums;; this.smallTableGenerationParameters = smallTableGenerationParameters; - switch (vectorMapJoinVariation) { - case INNER_BIG_ONLY: - case LEFT_SEMI: - trimAwaySmallTableValueInfo(); - break; - case INNER: - case OUTER: - break; - default: - throw new RuntimeException("Unknown operator variation " + vectorMapJoinVariation); - } + this.mapJoinPlanVariation = mapJoinPlanVariation; computeDerived(); } @@ -155,45 +192,121 @@ public SmallTableGenerationParameters getSmallTableGenerationParameters() { } public void computeDerived() { - bigTableColumnNamesList = Arrays.asList(bigTableColumnNames); - bigTableKeyColumnNames = new String[bigTableKeyColumnNums.length]; - bigTableKeyTypeInfos = new TypeInfo[bigTableKeyColumnNums.length]; - for (int i = 0; i < bigTableKeyColumnNums.length; i++) { - bigTableKeyColumnNames[i] = bigTableColumnNames[bigTableKeyColumnNums[i]]; - bigTableKeyTypeInfos[i] = bigTableTypeInfos[bigTableKeyColumnNums[i]]; + final int bigTableSize = bigTableTypeInfos.length; + + if (bigTableKeyColumnNames == null) { + + // Automatically populate. + bigTableColumnNames = new String[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { + bigTableColumnNames[i] = "_col" + i; + } } - smallTableValueColumnNamesList = Arrays.asList(smallTableValueColumnNames); + // Automatically populate. + bigTableColumnNums = new int[bigTableSize]; - bigTableObjectInspectors = new ObjectInspector[bigTableTypeInfos.length]; - for (int i = 0; i < bigTableTypeInfos.length; i++) { + for (int i = 0; i < bigTableSize; i++) { + bigTableColumnNums[i] = i; + } + + // Automatically populate. + bigTableRetainColumnNums = new int[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { + bigTableRetainColumnNums[i] = i; + } + + /* + * Big Table key information. + */ + final int keySize = bigTableKeyColumnNums.length; + + bigTableKeyColumnNames = new String[keySize]; + bigTableKeyTypeInfos = new TypeInfo[keySize]; + for (int i = 0; i < keySize; i++) { + final int bigTableKeyColumnNum = bigTableKeyColumnNums[i]; + bigTableKeyColumnNames[i] = bigTableColumnNames[bigTableKeyColumnNum]; + bigTableKeyTypeInfos[i] = bigTableTypeInfos[bigTableKeyColumnNum]; + } + + /* + * Big Table object inspectors. + */ + bigTableObjectInspectors = new ObjectInspector[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { bigTableObjectInspectors[i] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((PrimitiveTypeInfo) bigTableTypeInfos[i]); + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) bigTableTypeInfos[i]); + } + bigTableColumnNameList = Arrays.asList(bigTableColumnNames); + bigTableObjectInspectorList = Arrays.asList(bigTableObjectInspectors); + + /* + * Small Table key object inspectors are derived directly from the Big Table key information. + */ + smallTableKeyColumnNames = new String[keySize]; + smallTableKeyTypeInfos = Arrays.copyOf(bigTableKeyTypeInfos, keySize); + smallTableKeyObjectInspectors = new ObjectInspector[keySize]; + for (int i = 0; i < keySize; i++) { + smallTableKeyColumnNames[i] = "_col" + i; + final int bigTableKeyColumnNum = bigTableKeyColumnNums[i]; + smallTableKeyObjectInspectors[i] = bigTableObjectInspectors[bigTableKeyColumnNum]; + } + smallTableKeyColumnNameList = Arrays.asList(smallTableKeyColumnNames); + smallTableKeyObjectInspectorList = Arrays.asList(smallTableKeyObjectInspectors); + + // First part of Small Table information is the key information. + smallTableColumnNameList = new ArrayList(smallTableKeyColumnNameList); + List smallTableTypeInfoList = + new ArrayList(Arrays.asList(smallTableKeyTypeInfos)); + smallTableObjectInspectorList = new ArrayList(); + smallTableObjectInspectorList.addAll(smallTableKeyObjectInspectorList); + + final int valueSize = smallTableValueTypeInfos.length; + + // Automatically populate. + smallTableValueColumnNames = new String[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableValueColumnNames[i] = "_col" + (keySize + i); } - bigTableObjectInspectorsList = Arrays.asList(bigTableObjectInspectors); - smallTableObjectInspectors = new ObjectInspector[smallTableValueTypeInfos.length]; - smallTablePrimitiveCategories = new PrimitiveCategory[smallTableValueTypeInfos.length]; - smallTableValuePrimitiveTypeInfos = new PrimitiveTypeInfo[smallTableValueTypeInfos.length]; - for (int i = 0; i < smallTableValueTypeInfos.length; i++) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) smallTableValueTypeInfos[i]; - smallTableObjectInspectors[i] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo); - smallTablePrimitiveCategories[i] = primitiveTypeInfo.getPrimitiveCategory(); - smallTableValuePrimitiveTypeInfos[i] = primitiveTypeInfo; + smallTableValueObjectInspectors = new ObjectInspector[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableValueObjectInspectors[i] = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) smallTableValueTypeInfos[i]); } - smallTableObjectInspectorsList = Arrays.asList(smallTableObjectInspectors); + smallTableValueColumnNameList = Arrays.asList(smallTableValueColumnNames); + smallTableTypeInfoList.addAll(Arrays.asList(smallTableValueTypeInfos)); + smallTableValueObjectInspectorList = Arrays.asList(smallTableValueObjectInspectors); + smallTableColumnNameList.addAll(smallTableValueColumnNameList); + smallTableColumnNames = smallTableColumnNameList.toArray(new String[0]); + smallTableTypeInfos = smallTableTypeInfoList.toArray(new TypeInfo[0]); + + smallTableObjectInspectorList.addAll(smallTableValueObjectInspectorList); + + /* + * The inputObjectInspectors describe the keys and values of the Big Table and Small Table. + */ bigTableStandardObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - bigTableColumnNamesList, Arrays.asList((ObjectInspector[]) bigTableObjectInspectors)); + bigTableColumnNameList, bigTableObjectInspectorList); smallTableStandardObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - smallTableValueColumnNamesList, Arrays.asList((ObjectInspector[]) smallTableObjectInspectors)); + smallTableColumnNameList, smallTableObjectInspectorList); inputObjectInspectors = - new ObjectInspector[] { bigTableStandardObjectInspector, smallTableStandardObjectInspector }; + new ObjectInspector[] { + bigTableStandardObjectInspector, smallTableStandardObjectInspector }; + + // For now, we always retain the Small Table values... + // Automatically populate. + smallTableRetainValueColumnNums = new int[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableRetainValueColumnNums[i] = i; + } int outputLength = bigTableRetainColumnNums.length + @@ -203,12 +316,13 @@ public void computeDerived() { outputTypeInfos = new TypeInfo[outputLength]; int outputIndex = 0; - for (int i = 0; i < bigTableRetainColumnNums.length; i++) { + final int bigTableRetainSize = bigTableRetainColumnNums.length; + for (int i = 0; i < bigTableRetainSize; i++) { outputTypeInfos[outputIndex++] = bigTableTypeInfos[bigTableRetainColumnNums[i]]; } - // for (int i = 0; i < smallTableRetainKeyColumnNums.length; i++) { - // outputTypeInfos[outputIndex++] = smallTableTypeInfos[smallTableRetainKeyColumnNums[i]]; - // } + for (int i = 0; i < smallTableRetainKeyColumnNums.length; i++) { + outputTypeInfos[outputIndex++] = smallTableKeyTypeInfos[smallTableRetainKeyColumnNums[i]]; + } for (int i = 0; i < smallTableRetainValueColumnNums.length; i++) { outputTypeInfos[outputIndex++] = smallTableValueTypeInfos[smallTableRetainValueColumnNums[i]]; } @@ -221,13 +335,6 @@ public void computeDerived() { } } - public void trimAwaySmallTableValueInfo() { - smallTableValueColumnNames = new String[] {}; - smallTableValueTypeInfos = new TypeInfo[] {}; - smallTableRetainKeyColumnNums = new int[] {}; - smallTableRetainValueColumnNums = new int[] {}; - } - private String[] createOutputColumnNames(int outputColumnCount) { String[] outputColumnNames = new String[outputColumnCount]; int counter = 1; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java new file mode 100644 index 0000000..fdd0342 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * An expression representing a column, only children are evaluated. + */ +public class NoOpExpression extends VectorExpression { + + private static final long serialVersionUID = 1L; + + public NoOpExpression() { + } + + public NoOpExpression(int colNum) { + super(colNum); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + } + + @Override + public String vectorExpressionParameters() { + return "noOpCol" + outputColumnNum + ":" + + getTypeName(outputTypeInfo, outputDataTypePhysicalVariation); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()).build(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java index 4c41f9c..6647e77 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java @@ -48,8 +48,10 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; @@ -57,8 +59,13 @@ import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; @@ -69,7 +76,6 @@ import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; @@ -86,14 +92,13 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.BytesWritable; @@ -101,6 +106,7 @@ import org.apache.hive.common.util.HashCodeUtil; import org.apache.hive.common.util.ReflectionUtil; import org.junit.Test; +import org.junit.Ignore; import java.io.IOException; import java.util.ArrayList; @@ -120,233 +126,1355 @@ public class TestMapJoinOperator { - /* - * This test collector operator is for MapJoin row-mode. - */ - private class TestMultiSetCollectorOperator extends RowCollectorTestOperator { + private boolean addLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, false); + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, true); + break; + case 2: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + private boolean goodTestVariation(MapJoinTestDescription testDesc) { + final int smallTableValueSize = testDesc.smallTableRetainValueColumnNums.length; + + switch (testDesc.vectorMapJoinVariation) { + case INNER: + return (smallTableValueSize > 0); + case INNER_BIG_ONLY: + case LEFT_SEMI: + return (smallTableValueSize == 0); + case OUTER: + return true; + case FULL_OUTER: + return true; + default: + throw new RuntimeException( + "Unexpected vectorMapJoinVariation " + testDesc.vectorMapJoinVariation); + } + + } + + @Ignore + @Test + public void testLong0() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong0"); + + return false; + } + + @Ignore + @Test + public void testLong0_NoRegularKeys() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "doTestLong0_NoRegularKeys"); + + return false; + } + + @Ignore + @Test + public void testLong1() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong1( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong1(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, long value; Small Table: no key retained, string value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong1"); + + return false; + } + + @Test + public void testLong2() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong2( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong2(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: short key, no value; Small Table: key retained, timestamp value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.timestampTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong2"); + + return false; + } + + + @Test + public void testLong3() throws Exception { + long seed = 9934; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong3"); + + return false; + } + + @Test + public void testLong3_NoRegularKeys() throws Exception { + long seed = 9934; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "doTestLong3_NoRegularKeys"); + + return false; + } + + @Test + public void testLong4() throws Exception { + long seed = 3982; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong4( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong4(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, no value; Small Table: no key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong4"); + + return false; + } + + @Test + public void testLong5() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong5( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong5(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong5"); + + return false; + } + + @Test + public void testLong6() throws Exception { + long seed = 9384; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong6( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong6(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, timestamp value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.timestampTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong6"); + + return false; + } + + private boolean addNonLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + @Test + public void testMultiKey0() throws Exception { + long seed = 28322; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestMultiKey0(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.intTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1}; + + smallTableRetainKeyColumnNums = new int[] {0, 1}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testMultiKey0"); + + return false; + } + + @Test + public void testMultiKey1() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestMultiKey1(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - private final RowTestObjectsMultiSet testRowMultiSet; + int rowCount = 10; - public TestMultiSetCollectorOperator( - ObjectInspector[] outputObjectInspectors, - RowTestObjectsMultiSet testRowMultiSet) { - super(outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; - } + HiveConf hiveConf = new HiveConf(); - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; } - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); - } + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.timestampTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; + + smallTableValueTypeInfos = + new TypeInfo[] {new DecimalTypeInfo(38, 18)}; + + //---------------------------------------------------------------------------------------------- - @Override - public String getName() { - return TestMultiSetCollectorOperator.class.getSimpleName(); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testMultiKey1"); + + return false; } - private class TestMultiSetVectorCollectorOperator extends RowVectorCollectorTestOperator { + @Test + public void testMultiKey2() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } - private final RowTestObjectsMultiSet testRowMultiSet; + public boolean doTestMultiKey2(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; - } + int rowCount = 10; - public TestMultiSetVectorCollectorOperator(TypeInfo[] outputTypeInfos, - ObjectInspector[] outputObjectInspectors, RowTestObjectsMultiSet testRowMultiSet) - throws HiveException { - super(outputTypeInfos, outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; - } + HiveConf hiveConf = new HiveConf(); - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; } - @Override - public String getName() { - return TestMultiSetVectorCollectorOperator.class.getSimpleName(); - } - } + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; - private static class KeyConfig { - long seed; - PrimitiveTypeInfo primitiveTypeInfo; - KeyConfig(long seed, PrimitiveTypeInfo primitiveTypeInfo) { - this.seed = seed; - this.primitiveTypeInfo = primitiveTypeInfo; + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testMultiKey0"); + + return false; } - private static KeyConfig[] longKeyConfigs = new KeyConfig[] { - new KeyConfig(234882L, TypeInfoFactory.longTypeInfo), - new KeyConfig(4600L, TypeInfoFactory.intTypeInfo), - new KeyConfig(98743L, TypeInfoFactory.shortTypeInfo)}; @Test - public void testLong() throws Exception { - for (KeyConfig longKeyConfig : longKeyConfigs) { + public void testMultiKey3() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; - } - doTestLong(longKeyConfig.seed, longKeyConfig.primitiveTypeInfo, vectorMapJoinVariation); + hiveConfVariationsDone = + doTestMultiKey3( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); } - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestLong(long seed, TypeInfo numberTypeInfo, - VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestMultiKey3(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"number1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.longTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo}; + int[] bigTableKeyColumnNums = null; - int[] bigTableRetainColumnNums = new int[] {0}; + TypeInfo[] smallTableValueTypeInfos = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.dateTypeInfo, + TypeInfoFactory.byteTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1}; + + smallTableRetainKeyColumnNums = new int[] {0, 1}; + + smallTableValueTypeInfos = + new TypeInfo[] {}; + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = - new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + testData = + new MapJoinTestData(rowCount, testDesc, seed); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testMultiKey3"); + + return false; } @Test - public void testMultiKey() throws Exception { + public void testString0() throws Exception { long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); } - doTestMultiKey(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestMultiKey(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString0(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1", "b2", "b3"}; - TypeInfo[] bigTableTypeInfos = + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One plain STRING key column. + bigTableTypeInfos = new TypeInfo[] { - TypeInfoFactory.intTypeInfo, - TypeInfoFactory.longTypeInfo, TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0, 1, 2}; + bigTableKeyColumnNums = new int[] {0}; - String[] smallTableValueColumnNames = new String[] {"sv1"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testString0"); + + return false; + } + + @Test + public void testString1() throws Exception { + long seed = 3422; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestString1(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; - int[] bigTableRetainColumnNums = new int[] {0, 1, 2}; + int[] bigTableKeyColumnNums = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0}; + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One BINARY key column. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.binaryTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.floatTypeInfo, + new DecimalTypeInfo(38, 18)}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = - new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + testData = + new MapJoinTestData(rowCount, testDesc, seed); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testString1"); + + return false; } @Test - public void testString() throws Exception { - long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + public void testString2() throws Exception { + long seed = 7439; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); } - doTestString(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestString(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString2(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + TypeInfo[] bigTableTypeInfos = null; - int[] bigTableRetainColumnNums = new int[] {0}; + int[] bigTableKeyColumnNums = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One STRING key column; Small Table value: NONE (tests INNER_BIG_ONLY, LEFT_SEMI). + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = - new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testString2"); - executeTest(testDesc, testData); + return false; } private void addBigTableRetained(MapJoinTestDescription testDesc, Object[] bigTableRowObjects, @@ -357,14 +1485,32 @@ private void addBigTableRetained(MapJoinTestDescription testDesc, Object[] bigTa } } - private void addToOutput(MapJoinTestDescription testDesc, RowTestObjectsMultiSet expectedTestRowMultiSet, - Object[] outputObjects) { + private void addToOutput(MapJoinTestDescription testDesc, + RowTestObjectsMultiSet expectedTestRowMultiSet, Object[] outputObjects, + RowTestObjectsMultiSet.RowFlag rowFlag) { for (int c = 0; c < outputObjects.length; c++) { - PrimitiveObjectInspector primitiveObjInsp = ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); + PrimitiveObjectInspector primitiveObjInsp = + ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); Object outputObject = outputObjects[c]; outputObjects[c] = primitiveObjInsp.copyObject(outputObject); } - expectedTestRowMultiSet.add(new RowTestObjects(outputObjects)); + expectedTestRowMultiSet.add(new RowTestObjects(outputObjects), rowFlag); + } + + private String rowToCsvString(Object[] rowObjects) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < rowObjects.length; i++) { + if (sb.length() > 0) { + sb.append(","); + } + Object obj = rowObjects[i]; + if (obj == null) { + sb.append("\\N"); + } else { + sb.append(obj); + } + } + return sb.toString(); } /* @@ -377,7 +1523,7 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet(); VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + vectorExtractRow.init(testDesc.bigTableTypeInfos); final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; Object[] bigTableRowObjects = new Object[bigTableColumnCount]; @@ -385,32 +1531,36 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript final int bigTableKeyColumnCount = testDesc.bigTableKeyTypeInfos.length; Object[] bigTableKeyObjects = new Object[bigTableKeyColumnCount]; - VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream(); + VectorRandomBatchSource bigTableBatchSource = testData.getBigTableBatchSource(); VectorizedRowBatch batch = testData.getBigTableBatch(); - bigTableBatchStream.reset(); - while (bigTableBatchStream.isNext()) { - batch.reset(); - bigTableBatchStream.fillNext(batch); + bigTableBatchSource.resetBatchIteration(); + while (bigTableBatchSource.fillNextBatch(batch)) { final int size = testData.bigTableBatch.size; for (int r = 0; r < size; r++) { vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); // Form key object array + boolean hasAnyNulls = false; // NULLs may be present in {FULL|LEFT|RIGHT} OUTER joins. for (int k = 0; k < bigTableKeyColumnCount; k++) { int keyColumnNum = testDesc.bigTableKeyColumnNums[k]; - bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum]; + Object keyObject = bigTableRowObjects[keyColumnNum]; + if (keyObject == null) { + hasAnyNulls = true; + } + bigTableKeyObjects[k] = keyObject; bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]); } RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects); - if (testData.smallTableKeyHashMap.containsKey(testKey)) { + if (testData.smallTableKeyHashMap.containsKey(testKey) && !hasAnyNulls) { int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey); switch (testDesc.vectorMapJoinVariation) { case INNER: case OUTER: + case FULL_OUTER: { // One row per value. ArrayList valueList = testData.smallTableValues.get(smallTableKeyIndex); @@ -420,36 +1570,46 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + Object[] valueRow = valueList.get(v).getRow(); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } } break; case INNER_BIG_ONLY: - { - // Value count rows. - final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex); - for (int v = 0; v < valueCount; v++) { - Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; - - addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); - } - } - break; case LEFT_SEMI: { - // One row (existence). Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } break; default: @@ -458,9 +1618,10 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript } else { - // No match. + // Big Table non-match. - if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) { + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { // We need to add a non-match row with nulls for small table values. @@ -468,14 +1629,74 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = null; + } + + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = null; + outputObjects[outputColumnNum++] = null; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.LEFT_OUTER); + } + } + } + } + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + + System.out.println("*BENCHMARK* ----------------------------------------------------------------------"); + System.out.println("*BENCHMARK* FULL OUTER non-match key count " + + testData.fullOuterAdditionalSmallTableKeys.size()); + + // Fill in non-match Small Table key results. + for (RowTestObjects smallTableKey : testData.fullOuterAdditionalSmallTableKeys) { + + System.out.println( + "*BENCHMARK* fullOuterAdditionalSmallTableKey " + smallTableKey.toString()); + + int smallTableKeyIndex = testData.smallTableKeyHashMap.get(smallTableKey); + + // One row per value. + ArrayList valueList = testData.smallTableValues.get(smallTableKeyIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; + + // Non-match Small Table keys produce NULL Big Table columns. + final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; + for (int o = 0; o < bigTableRetainColumnNumsLength; o++) { + outputObjects[o] = null; + } + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + // The output result may include 0, 1, or more small key columns... + Object[] smallKeyObjects = smallTableKey.getRow(); + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + smallKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + + Object[] valueRow = valueList.get(v).getRow(); + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; + for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.FULL_OUTER); } } } @@ -483,67 +1704,333 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript return expectedTestRowMultiSet; } - private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData) throws Exception { + private void generateBigAndSmallTableRowLogLines(MapJoinTestDescription testDesc, + MapJoinTestData testData) throws HiveException { + + // Generate Big Table rows log lines... + VectorExtractRow vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(testDesc.bigTableTypeInfos); + + final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; + Object[] bigTableRowObjects = new Object[bigTableColumnCount]; + + VectorRandomBatchSource bigTableBatchSource = testData.getBigTableBatchSource(); + VectorizedRowBatch batch = testData.getBigTableBatch(); + while (bigTableBatchSource.fillNextBatch(batch)) { + + final int size = testData.bigTableBatch.size; + for (int r = 0; r < size; r++) { + vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); + + System.out.println("*BIG TABLE* " + rowToCsvString(bigTableRowObjects)); + } + } + + // Generate Small Table rows log lines... + final int keyKeyColumnNumsLength = + testDesc.bigTableKeyColumnNums.length; + final int smallTableRetainValueLength = + testDesc.smallTableRetainValueColumnNums.length; + final int smallTableLength = keyKeyColumnNumsLength + smallTableRetainValueLength; + for (Entry entry : testData.smallTableKeyHashMap.entrySet()) { + if (smallTableRetainValueLength == 0) { + Object[] smallTableRowObjects = entry.getKey().getRow(); + System.out.println("*SMALL TABLE* " + rowToCsvString(smallTableRowObjects)); + } else { + Integer valueIndex = entry.getValue(); + ArrayList valueList = testData.smallTableValues.get(valueIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] smallTableRowObjects = new Object[smallTableLength]; + System.arraycopy(entry.getKey().getRow(), 0, smallTableRowObjects, 0, keyKeyColumnNumsLength); + int outputColumnNum = keyKeyColumnNumsLength; + Object[] valueRow = valueList.get(v).getRow(); + for (int o = 0; o < smallTableRetainValueLength; o++) { + smallTableRowObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + } + System.out.println("*SMALL TABLE* " + rowToCsvString(smallTableRowObjects)); + } + } + } + } + + private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + + // So stack trace is self-explanatory. + switch (testDesc.vectorMapJoinVariation) { + case INNER: + executeTestInner(testDesc, testData, title); + break; + case INNER_BIG_ONLY: + executeTestInnerBigOnly(testDesc, testData, title); + break; + case LEFT_SEMI: + executeTestLeftSemi(testDesc, testData, title); + break; + case OUTER: + executeTestOuter(testDesc, testData, title); + break; + case FULL_OUTER: + executeTestFullOuter(testDesc, testData, title); + break; + default: + throw new RuntimeException("Unexpected Vector MapJoin variation " + + testDesc.vectorMapJoinVariation); + } + } + + private void executeTestInner(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestInnerBigOnly(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestLeftSemi(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestFullOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void doExecuteTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { RowTestObjectsMultiSet expectedTestRowMultiSet = createExpectedTestRowMultiSet(testDesc, testData); - // UNDONE: Inner count - System.out.println("*BENCHMARK* expectedTestRowMultiSet rowCount " + expectedTestRowMultiSet.getRowCount() + - " totalCount " + expectedTestRowMultiSet.getTotalCount()); + generateBigAndSmallTableRowLogLines(testDesc, testData); + + System.out.println("*BENCHMARK* expectedTestRowMultiSet " + + " totalKeyCount " + expectedTestRowMultiSet.getTotalKeyCount() + + " totalValueCount " + expectedTestRowMultiSet.getTotalValueCount()); // Execute all implementation variations. for (MapJoinTestImplementation mapJoinImplementation : MapJoinTestImplementation.values()) { - executeTestImplementation(mapJoinImplementation, testDesc, testData, - expectedTestRowMultiSet); + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Key match tracking not supported in plain Java HashMap. + continue; + } + switch (mapJoinImplementation) { + case ROW_MODE_HASH_MAP: + executeRowModeHashMap( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case ROW_MODE_OPTIMIZED: + executeRowModeOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case VECTOR_PASS_THROUGH: + executeVectorPassThrough( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_OPTIMIZED: + executeNativeVectorOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_FAST: + executeNativeVectorFast( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + default: + throw new RuntimeException( + "Unexpected vector map join test variation"); + } } } - private boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { - return - (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && - mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + private void executeRowModeHashMap( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_HASH_MAP, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeRowModeOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeVectorPassThrough( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.VECTOR_PASS_THROUGH, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorFast( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_FAST, + testDesc, testData, + expectedTestRowMultiSet, + title); } private void executeTestImplementation( MapJoinTestImplementation mapJoinImplementation, - MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet) + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) throws Exception { - System.out.println("*BENCHMARK* Starting " + mapJoinImplementation + " test"); + System.out.println("*BENCHMARK* Starting implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + + " title " + title); // UNDONE: Parameterize for implementation variation? MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc); - final boolean isVectorOutput = isVectorOutput(mapJoinImplementation); + final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation); RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet(); - Operator testCollectorOperator = - (!isVectorOutput ? - new TestMultiSetCollectorOperator( - testDesc.outputObjectInspectors, outputTestRowMultiSet) : - new TestMultiSetVectorCollectorOperator( - testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet)); - - MapJoinOperator operator = + CreateMapJoinResult result = MapJoinTestConfig.createMapJoinImplementation( - mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + mapJoinImplementation, testDesc, testData, mapJoinDesc); + MapJoinOperator mapJoinOperator = result.mapJoinOperator; + MapJoinTableContainer mapJoinTableContainer = result.mapJoinTableContainer; + MapJoinTableContainerSerDe mapJoinTableContainerSerDe = result.mapJoinTableContainerSerDe; + + CountCollectorTestOperator testCollectorOperator; + if (!isVectorOutput) { + testCollectorOperator = + new TestMultiSetCollectorOperator( + testDesc.outputObjectInspectors, outputTestRowMultiSet); + } else { + VectorizationContext vOutContext = + ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext(); + testCollectorOperator = + new TestMultiSetVectorCollectorOperator( + ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().toArray(new Integer[0])), + testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet); + } + + MapJoinTestConfig.connectOperators(mapJoinOperator, testCollectorOperator); + + CountCollectorTestOperator interceptTestCollectorOperator = null; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + + if (mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Not supported. + return; + } + + // Wire in FULL OUTER Intercept. + interceptTestCollectorOperator = + MapJoinTestConfig.addFullOuterIntercept( + mapJoinImplementation, testDesc, outputTestRowMultiSet, testData, + mapJoinOperator, mapJoinTableContainer, mapJoinTableContainerSerDe); + } else { + + // Invoke initializeOp methods. + mapJoinOperator.initialize( + testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables. + mapJoinOperator.setTestMapJoinTableContainer( + 1, mapJoinTableContainer, mapJoinTableContainerSerDe); + } if (!isVectorOutput) { - MapJoinTestData.driveBigTableData(testDesc, testData, operator); + MapJoinTestData.driveBigTableData(testDesc, testData, mapJoinOperator); } else { - MapJoinTestData.driveVectorBigTableData(testDesc, testData, operator); + MapJoinTestData.driveVectorBigTableData(testDesc, testData, mapJoinOperator); + } + + if (!testCollectorOperator.getIsClosed()) { + Assert.fail("collector operator not closed"); + } + if (testCollectorOperator.getIsAborted()) { + Assert.fail("collector operator aborted"); + } + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + if (!interceptTestCollectorOperator.getIsClosed()) { + Assert.fail("intercept collector operator not closed"); + } + if (interceptTestCollectorOperator.getIsAborted()) { + Assert.fail("intercept collector operator aborted"); + } } System.out.println("*BENCHMARK* executeTestImplementation row count " + - ((CountCollectorTestOperator) testCollectorOperator).getRowCount()); + testCollectorOperator.getRowCount()); // Verify the output! - if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet)) { - System.out.println("*BENCHMARK* verify failed for " + mapJoinImplementation); + String option = ""; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + option = " mapJoinPlanVariation " + testDesc.mapJoinPlanVariation.name(); + } + if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet, "expected", "actual")) { + System.out.println("*BENCHMARK* " + title + " verify failed" + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); + expectedTestRowMultiSet.displayDifferences(outputTestRowMultiSet, "expected", "actual"); } else { - System.out.println("*BENCHMARK* verify succeeded for " + mapJoinImplementation); + System.out.println("*BENCHMARK* " + title + " verify succeeded " + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); } } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java index 09dcb83..3ce061d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java @@ -30,11 +30,15 @@ import junit.framework.TestCase; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.WritableComparator; @@ -197,6 +201,20 @@ public long getKey(int index) { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastLongHashMap map, int index, MatchTracker matchTracker) { + FastLongHashMapElement element = array[index]; + long longKey = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(longKey, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastLongHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -204,18 +222,77 @@ public void verify(VectorMapJoinFastLongHashMap map) { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(long searchLong) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastLongHashMapElement element = array[index]; - long key = element.getKey(); - List values = element.getValues(); + long longKey = element.getKey(); + if (longKey == searchLong) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastLongHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean nonMatched[] = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean returnedNonMatched[] = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedLongKey(); + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + long longKey = nonMatchedIterator.getNonMatchedLongKey(); + int index = findKeyInArray(longKey); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastLongHashMapElement element = array[index]; + List values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } } @@ -247,6 +324,11 @@ public int getValueCount() { public void addValue(byte[] value) { values.add(value); } + + @Override + public String toString() { + return "Key length " + key.length + ", value count " + values.size(); + } } /* @@ -310,6 +392,21 @@ public void add(byte[] key, byte[] value) { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastBytesHashMap map, int index, + MatchTracker matchTracker) { + FastBytesHashMapElement element = array[index]; + byte[] key = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastBytesHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -317,18 +414,82 @@ public void verify(VectorMapJoinFastBytesHashMap map) { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(byte[] searchKeyBytes, int searchKeyOffset, int searchKeyLength) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastBytesHashMapElement element = array[index]; - byte[] key = element.getKey(); - List values = element.getValues(); + byte[] keyBytes = element.getKey(); + if (keyBytes.length == searchKeyLength && + StringExpr.equal( + keyBytes, 0, keyBytes.length, + searchKeyBytes, searchKeyOffset, searchKeyLength)) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastBytesHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean nonMatched[] = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean returnedNonMatched[] = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedBytesKey();; + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes(); + int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + int keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + int index = findKeyInArray(keyBytes, keyOffset, keyLength); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastBytesHashMapElement element = array[index]; + List values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java new file mode 100644 index 0000000..6833553 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +/* + * An multi-key value hash map optimized for vector map join. + * + * The key is uninterpreted bytes. + */ +public class TestVectorMapJoinFastBytesHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(82733); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(29383); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, MAX_KEY_LENGTH, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable, + int maxKeyLength, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(maxKeyLength)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(9332); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testReallyBig() throws Exception { + random = new Random(42662); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java index cbd77d1..fb8be91 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java @@ -37,7 +37,7 @@ public void testOneKey() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -65,7 +65,7 @@ public void testMultipleKeysSingleValue() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -91,7 +91,7 @@ public void testGetNonExistent() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -125,7 +125,7 @@ public void testFullMap() throws Exception { // Make sure the map does not expand; should be able to find space. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,CAPACITY, 1f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet(CAPACITY, 1f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -167,7 +167,7 @@ public void testExpand() throws Exception { // Start with capacity 1; make sure we expand on every put. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,1, 0.0000001f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet(1, 0.0000001f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -227,7 +227,7 @@ public void testMultipleKeysMultipleValue() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -242,7 +242,7 @@ public void testLargeAndExpand() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java new file mode 100644 index 0000000..8e53501 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorMapJoinFastLongHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(33221); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + long key = random.nextLong(); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(900); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testExpand() throws Exception { + random = new Random(5227); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + for (int i = 0; i < 18; ++i) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(8); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(20); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java index bbb5da0..f64d180 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java @@ -39,7 +39,7 @@ public void testOneKey() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -66,7 +66,7 @@ public void testMultipleKeysSingleValue() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -94,7 +94,7 @@ public void testGetNonExistent() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -126,7 +126,7 @@ public void testFullMap() throws Exception { // Make sure the map does not expand; should be able to find space. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -167,7 +167,7 @@ public void testExpand() throws Exception { // Start with capacity 1; make sure we expand on every put. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); + false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -225,7 +225,7 @@ public void testMultipleKeysMultipleValue() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -240,7 +240,7 @@ public void testLargeAndExpand() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java index c908f66..9615bf3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java @@ -599,8 +599,6 @@ public static Object deserializeReadComplexType(DeserializeRead deserializeRead, return getComplexField(deserializeRead, typeInfo); } - static int fake = 0; - private static Object getComplexField(DeserializeRead deserializeRead, TypeInfo typeInfo) throws IOException { switch (typeInfo.getCategory()) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java index 793a676..ff88841 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java @@ -23,8 +23,10 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -109,15 +111,25 @@ public static GenerateCategory generateCategoryFromPrimitiveCategory(PrimitiveCa } private GenerateCategory category; + private boolean allowNulls; public GenerateType(GenerateCategory category) { this.category = category; } + public GenerateType(GenerateCategory category, boolean allowNulls) { + this.category = category; + this.allowNulls = allowNulls; + } + public GenerateCategory getCategory() { return category; } + public boolean getAllowNulls() { + return allowNulls; + } + /* * BOOLEAN .. LONG: Min and max. */ @@ -180,6 +192,7 @@ public void assignColumnVectors(VectorizedRowBatch batch, int columnNum, case SHORT: case INT: case LONG: + case DATE: colVector = new LongColumnVector(); break; @@ -189,16 +202,22 @@ public void assignColumnVectors(VectorizedRowBatch batch, int columnNum, break; case STRING: + case CHAR: + case VARCHAR: + case BINARY: colVector = new BytesColumnVector(); break; - // UNDONE - case DATE: case TIMESTAMP: - case BINARY: + colVector = new TimestampColumnVector(); + break; + case DECIMAL: - case VARCHAR: - case CHAR: + colVector = new DecimalColumnVector(38, 18); + break; + + // UNDONE + case LIST: case MAP: case STRUCT: diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java index 9bf9d9d..65539bd 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java @@ -22,21 +22,30 @@ import java.util.Arrays; import java.util.Random; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.RandomTypeUtil; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.Text; public class VectorColumnGroupGenerator { private GenerateType[] generateTypes; private int[] columnNums; private Object[] arrays; + private boolean[][] isNullArrays; public VectorColumnGroupGenerator(int columnNum, GenerateType generateType) { columnNums = new int[] {columnNum}; @@ -59,6 +68,7 @@ public VectorColumnGroupGenerator(int startColumnNum, GenerateType[] generateTyp private void allocateArrays(int size) { arrays = new Object[generateTypes.length]; + isNullArrays = new boolean[generateTypes.length][]; for (int i = 0; i < generateTypes.length; i++) { GenerateType generateType = generateTypes[i]; GenerateCategory category = generateType.getCategory(); @@ -88,24 +98,34 @@ private void allocateArrays(int size) { case STRING: array = new String[size]; break; + case BINARY: + array = new byte[size][]; + break; + case DATE: + array = new Date[size]; + break; case TIMESTAMP: array = new Timestamp[size]; break; - - // UNDONE - case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: case CHAR: + array = new HiveChar[size]; + break; + case VARCHAR: + array = new HiveVarchar[size]; + break; + case DECIMAL: + array = new HiveDecimalWritable[size]; + break; case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unexpected generate category " + category); } arrays[i] = array; + isNullArrays[i] = new boolean[size]; } } @@ -139,16 +159,24 @@ public void clearColumnValueArrays() { case STRING: Arrays.fill(((String[]) array), null); break; + case BINARY: + Arrays.fill(((byte[][]) array), null); + break; + case DATE: + Arrays.fill(((Date[]) array), null); + break; case TIMESTAMP: Arrays.fill(((Timestamp[]) array), null); break; - - // UNDONE - case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: case CHAR: + Arrays.fill(((HiveChar[]) array), null); + break; + case VARCHAR: + Arrays.fill(((HiveVarchar[]) array), null); + break; + case DECIMAL: + Arrays.fill(((HiveDecimalWritable[]) array), null); + break; case LIST: case MAP: @@ -168,6 +196,11 @@ public void generateRowValues(int rowIndex, Random random) { private void generateRowColumnValue(int rowIndex, int columnIndex, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + if (allowNulls && random.nextInt(100) < 5) { + isNullArrays[columnIndex][rowIndex] = true; + return; + } Object array = arrays[columnIndex]; switch (category) { case BOOLEAN: @@ -228,6 +261,20 @@ private void generateRowColumnValue(int rowIndex, int columnIndex, Random random } break; + case BINARY: + { + byte[] value = RandomTypeUtil.getRandBinary(random, 10); + ((byte[][]) array)[rowIndex] = value; + } + break; + + case DATE: + { + Date value = RandomTypeUtil.getRandDate(random); + ((Date[]) array)[rowIndex] = value; + } + break; + case TIMESTAMP: { Timestamp value = RandomTypeUtil.getRandTimestamp(random).toSqlTimestamp(); @@ -235,14 +282,31 @@ private void generateRowColumnValue(int rowIndex, int columnIndex, Random random } break; - // UNDONE - case DATE: - // UNDONE: Needed to longTest? + case CHAR: + { + // UNDONE: Use CharTypeInfo.maxLength + HiveChar value = + new HiveChar(RandomTypeUtil.getRandString(random), 10); + ((HiveChar[]) array)[rowIndex] = value; + } + break; - case BINARY: - case DECIMAL: case VARCHAR: - case CHAR: + { + // UNDONE: Use VarcharTypeInfo.maxLength + HiveVarchar value = + new HiveVarchar(RandomTypeUtil.getRandString(random), 10); + ((HiveVarchar[]) array)[rowIndex] = value; + } + break; + + case DECIMAL: + { + HiveDecimalWritable value = + new HiveDecimalWritable(RandomTypeUtil.getRandHiveDecimal(random)); + ((HiveDecimalWritable[]) array)[rowIndex] = value; + } + break; case LIST: case MAP: @@ -261,7 +325,15 @@ public void fillDownRowValues(int rowIndex, int seriesCount, Random random) { private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCount, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); Object array = arrays[columnIndex]; + boolean[] isNull = isNullArrays[columnIndex]; + if (allowNulls && isNull[rowIndex]) { + for (int i = 1; i < seriesCount; i++) { + isNull[rowIndex + i] = true; + } + return; + } switch (category) { case BOOLEAN: { @@ -335,6 +407,24 @@ private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCou } } break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + byte[] value = byteArrayArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + byteArrayArray[rowIndex + i] = value; + } + } + break; + case DATE: + { + Date[] dateArray = ((Date[]) array); + Date value = dateArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + dateArray[rowIndex + i] = value; + } + } + break; case TIMESTAMP: { Timestamp[] timestampArray = ((Timestamp[]) array); @@ -344,14 +434,33 @@ private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCou } } break; - - // UNDONE - case DATE: - - case BINARY: - case DECIMAL: - case VARCHAR: case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + HiveChar value = hiveCharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveCharArray[rowIndex + i] = value; + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveVarcharArray = ((HiveVarchar[]) array); + HiveVarchar value = hiveVarcharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveVarcharArray[rowIndex + i] = value; + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + HiveDecimalWritable value = hiveDecimalWritableArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveDecimalWritableArray[rowIndex + i] = value; + } + } + break; case LIST: case MAP: @@ -387,6 +496,16 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde GenerateType generateType = generateTypes[logicalColumnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + boolean[] isNull = isNullArrays[logicalColumnIndex]; + if (allowNulls) { + for (int i = 0; i < size; i++) { + if (isNull[i]) { + colVector.isNull[i] = true; + colVector.noNulls = false; + } + } + } Object array = arrays[logicalColumnIndex]; switch (category) { case BOOLEAN: @@ -394,7 +513,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde boolean[] booleanArray = ((boolean[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = (booleanArray[i] ? 1 : 0); + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = (booleanArray[i] ? 1 : 0); + } } } break; @@ -403,7 +526,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde byte[] byteArray = ((byte[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = byteArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = byteArray[i]; + } } } break; @@ -412,7 +539,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde short[] shortArray = ((short[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = shortArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = shortArray[i]; + } } } break; @@ -421,7 +552,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde int[] intArray = ((int[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = intArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = intArray[i]; + } } } break; @@ -430,7 +565,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde long[] longArray = ((long[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = longArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = longArray[i]; + } } } break; @@ -439,7 +578,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde float[] floatArray = ((float[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = floatArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = floatArray[i]; + } } } break; @@ -448,7 +591,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde double[] doubleArray = ((double[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = doubleArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = doubleArray[i]; + } } } break; @@ -457,8 +604,35 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde String[] stringArray = ((String[]) array); BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); for (int i = 0; i < size; i++) { - byte[] bytes = stringArray[i].getBytes(); - bytesColVec.setVal(i, bytes); + if (!isNull[i]) { + byte[] bytes = stringArray[i].getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = byteArrayArray[i]; + bytesColVec.setVal(i, bytes); + } + } + } + break; + case DATE: + { + Date[] dateArray = ((Date[]) array); + LongColumnVector longColVec = ((LongColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + Date date = dateArray[i]; + longColVec.vector[i] = + DateWritableV2.dateToDays(date); + } } } break; @@ -467,26 +641,58 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde Timestamp[] timestampArray = ((Timestamp[]) array); TimestampColumnVector timestampColVec = ((TimestampColumnVector) colVector); for (int i = 0; i < size; i++) { - Timestamp timestamp = timestampArray[i]; - timestampColVec.set(i, timestamp); + if (!isNull[i]) { + Timestamp timestamp = timestampArray[i]; + timestampColVec.set(i, timestamp); + } + } + } + break; + case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveCharArray = ((HiveVarchar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + DecimalColumnVector decimalColVec = ((DecimalColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + HiveDecimalWritable decWritable = hiveDecimalWritableArray[i]; + decimalColVec.set(i, decWritable); + } } } break; // UNDONE - case DATE: - - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: - case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unepected generate category " + category); } } } \ No newline at end of file diff --git ql/src/test/queries/clientnegative/join32.q ql/src/test/queries/clientnegative/join32.q index 8c93914..3b6babb 100644 --- ql/src/test/queries/clientnegative/join32.q +++ ql/src/test/queries/clientnegative/join32.q @@ -2,6 +2,9 @@ --! qt:dataset:src1 --! qt:dataset:src set hive.cbo.enable=false; + +-- SORT_QUERY_RESULTS + CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; -- Mapjoin followed by Mapjoin is not supported. diff --git ql/src/test/queries/clientpositive/auto_join_filters.q ql/src/test/queries/clientpositive/auto_join_filters.q index ea028f6..9282f8f 100644 --- ql/src/test/queries/clientpositive/auto_join_filters.q +++ ql/src/test/queries/clientpositive/auto_join_filters.q @@ -7,7 +7,13 @@ LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_n5; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -24,10 +30,19 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOI SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); @@ -51,7 +66,12 @@ SET hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -68,10 +88,19 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOI SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); diff --git ql/src/test/queries/clientpositive/auto_join_nulls.q ql/src/test/queries/clientpositive/auto_join_nulls.q index 4a2b57b..a385128 100644 --- ql/src/test/queries/clientpositive/auto_join_nulls.q +++ ql/src/test/queries/clientpositive/auto_join_nulls.q @@ -19,10 +19,19 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a RIGHT OUTER JOI SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a RIGHT OUTER JOIN myinput1_n2 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a RIGHT OUTER JOIN myinput1_n2 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a RIGHT OUTER JOIN myinput1_n2 b ON a.key=b.key and a.value = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value and a.key=b.key; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.key; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value and a.key=b.key; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n2 a LEFT OUTER JOIN myinput1_n2 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1_n2 c ON (b.value=c.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n2 a RIGHT OUTER JOIN myinput1_n2 b ON (a.value=b.value) LEFT OUTER JOIN myinput1_n2 c ON (b.value=c.value); diff --git ql/src/test/queries/clientpositive/bucket_map_join_tez1.q ql/src/test/queries/clientpositive/bucket_map_join_tez1.q index 8248035..049a4d9 100644 --- ql/src/test/queries/clientpositive/bucket_map_join_tez1.q +++ ql/src/test/queries/clientpositive/bucket_map_join_tez1.q @@ -18,7 +18,7 @@ load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapj load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08'); load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08'); - +-- SORT_QUERY_RESULTS set hive.optimize.bucketingsorting=false; insert overwrite table tab_part_n9 partition (ds='2008-04-08') diff --git ql/src/test/queries/clientpositive/correlationoptimizer1.q ql/src/test/queries/clientpositive/correlationoptimizer1.q index 1c4f82a..d61d175 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer1.q +++ ql/src/test/queries/clientpositive/correlationoptimizer1.q @@ -216,6 +216,7 @@ set hive.optimize.correlation=false; -- they share the same key. Because those keys with a null value are not grouped -- in the output of the Full Outer Join, we cannot use a single MR to execute -- these two operators. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt @@ -227,7 +228,35 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt @@ -238,6 +267,7 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp; +SET hive.merge.nway.joins=true; set hive.auto.convert.join=false; set hive.optimize.correlation=false; diff --git ql/src/test/queries/clientpositive/correlationoptimizer2.q ql/src/test/queries/clientpositive/correlationoptimizer2.q index 6656084..cbb6e47 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer2.q +++ ql/src/test/queries/clientpositive/correlationoptimizer2.q @@ -96,6 +96,7 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 set hive.optimize.correlation=false; -- Full Outer Join should be handled. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -109,7 +110,39 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -122,8 +155,28 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; set hive.optimize.correlation=false; + +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=true; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT a.key AS key, count(1) AS cnt @@ -138,11 +191,13 @@ FROM (SELECT a.key AS key, count(1) AS cnt FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key) GROUP BY a.key) tmp; +SET hive.merge.nway.joins=false; set hive.optimize.correlation=true; -- After FULL OUTER JOIN, keys with null values are not grouped, right now, -- we have to generate 2 MR jobs for tmp, 1 MR job for a join b and another for the -- GroupByOperator on key. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT a.key AS key, count(1) AS cnt @@ -158,11 +213,30 @@ FROM (SELECT a.key AS key, count(1) AS cnt ON (a.key = b.key) GROUP BY a.key) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=false; -- When Correlation Optimizer is turned off, we need 4 MR jobs. -- When Correlation Optimizer is turned on, the subquery of tmp will be evaluated in -- a single MR job (including the subquery a, the subquery b, and a join b). So, we -- will have 2 MR jobs. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -176,7 +250,39 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -189,3 +295,4 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; diff --git ql/src/test/queries/clientpositive/correlationoptimizer4.q ql/src/test/queries/clientpositive/correlationoptimizer4.q index c34ff23..eab11c1 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer4.q +++ ql/src/test/queries/clientpositive/correlationoptimizer4.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; + CREATE TABLE T1_n146(key INT, val STRING); LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n146; CREATE TABLE T2_n86(key INT, val STRING); @@ -24,7 +25,7 @@ FROM (SELECT y.key AS key, count(1) AS cnt GROUP BY y.key) tmp; set hive.optimize.correlation=true; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x JOIN T1_n146 y ON (x.key = y.key) JOIN T3_n34 z ON (y.key = z.key) @@ -38,7 +39,7 @@ FROM (SELECT y.key AS key, count(1) AS cnt set hive.optimize.correlation=true; set hive.auto.convert.join=true; -- Enable hive.auto.convert.join. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x JOIN T1_n146 y ON (x.key = y.key) JOIN T3_n34 z ON (y.key = z.key) @@ -53,7 +54,7 @@ set hive.auto.convert.join=false; set hive.optimize.correlation=false; -- This case should be optimized, since the key of GroupByOperator is from the leftmost table -- of a chain of LEFT OUTER JOINs. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -65,7 +66,7 @@ FROM (SELECT x.key AS key, count(1) AS cnt GROUP BY x.key) tmp; set hive.optimize.correlation=true; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -80,7 +81,7 @@ set hive.optimize.correlation=true; -- This query will not be optimized by correlation optimizer because -- GroupByOperator uses y.key (a right table of a left outer join) -- as the key. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -94,7 +95,7 @@ FROM (SELECT y.key AS key, count(1) AS cnt set hive.optimize.correlation=false; -- This case should be optimized, since the key of GroupByOperator is from the rightmost table -- of a chain of RIGHT OUTER JOINs. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT z.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -106,7 +107,7 @@ FROM (SELECT z.key AS key, count(1) AS cnt GROUP BY z.key) tmp; set hive.optimize.correlation=true; -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT z.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -121,7 +122,7 @@ set hive.optimize.correlation=true; -- This query will not be optimized by correlation optimizer because -- GroupByOperator uses y.key (a left table of a right outer join) -- as the key. -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -135,7 +136,20 @@ FROM (SELECT y.key AS key, count(1) AS cnt set hive.optimize.correlation=false; -- This case should not be optimized because afer the FULL OUTER JOIN, rows with null keys -- are not grouped. -EXPLAIN +set hive.auto.convert.join=false; +EXPLAIN VECTORIZATION DETAIL +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +set hive.auto.convert.join=true; +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -147,7 +161,8 @@ FROM (SELECT y.key AS key, count(1) AS cnt GROUP BY y.key) tmp; set hive.optimize.correlation=true; -EXPLAIN +set hive.auto.convert.join=false; +EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -157,3 +172,15 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp; + +set hive.auto.convert.join=true; +EXPLAIN VECTORIZATION DETAIL +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..32b2e04 --- /dev/null +++ ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q @@ -0,0 +1,290 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=false; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + diff --git ql/src/test/queries/clientpositive/join32_lessSize.q ql/src/test/queries/clientpositive/join32_lessSize.q index fcadbe3..b998ac7 100644 --- ql/src/test/queries/clientpositive/join32_lessSize.q +++ ql/src/test/queries/clientpositive/join32_lessSize.q @@ -2,6 +2,7 @@ --! qt:dataset:src1 --! qt:dataset:src set hive.mapred.mode=nonstrict; + -- SORT_QUERY_RESULTS CREATE TABLE dest_j1_n21(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/join33.q ql/src/test/queries/clientpositive/join33.q index 1527575..6ddf0eb 100644 --- ql/src/test/queries/clientpositive/join33.q +++ ql/src/test/queries/clientpositive/join33.q @@ -2,6 +2,7 @@ --! qt:dataset:src1 --! qt:dataset:src set hive.mapred.mode=nonstrict; + -- SORT_QUERY_RESULTS CREATE TABLE dest_j1_n7(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/mapjoin2.q ql/src/test/queries/clientpositive/mapjoin2.q index e194bd0..014dabe 100644 --- ql/src/test/queries/clientpositive/mapjoin2.q +++ ql/src/test/queries/clientpositive/mapjoin2.q @@ -6,16 +6,30 @@ create table tbl_n1 (n bigint, t string); insert into tbl_n1 values (1, 'one'); insert into tbl_n1 values(2, 'two'); +explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n; select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n; +explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; +explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; diff --git ql/src/test/queries/clientpositive/mapjoin46.q ql/src/test/queries/clientpositive/mapjoin46.q index 9de7113..81f9610 100644 --- ql/src/test/queries/clientpositive/mapjoin46.q +++ ql/src/test/queries/clientpositive/mapjoin46.q @@ -3,6 +3,8 @@ set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; set hive.join.emit.interval=2; +-- SORT_QUERY_RESULTS + CREATE TABLE test1_n4 (key INT, value INT, col_1 STRING); INSERT INTO test1_n4 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); @@ -173,6 +175,22 @@ ON (test1_n4.value=test2_n2.value OR test2_n2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -185,8 +203,23 @@ FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value OR test1_n4.key between 100 and 102 OR test2_n2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and left input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -197,8 +230,23 @@ SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value OR test1_n4.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and right input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -209,8 +257,25 @@ SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value OR test2_n2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Keys plus residual (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -223,8 +288,51 @@ FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value AND (test1_n4.key between 100 and 102 OR test2_n2.key between 100 and 102)); +SET hive.merge.nway.joins=true; -- Mixed ( FOJ (ROJ, LOJ) ) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM ( @@ -263,3 +371,4 @@ FULL OUTER JOIN ( OR test2_n2.key between 100 and 102)) ) sq2 ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); +SET hive.merge.nway.joins=true; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q index 0500a62..10e982e 100644 --- ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q +++ ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q @@ -24,3 +24,9 @@ select a.* from alltypesorc a left outer join src b on a.cint = cast(b.key as int) and (a.cint < 100) limit 1; + +explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1; diff --git ql/src/test/queries/clientpositive/union14.q ql/src/test/queries/clientpositive/union14.q index 34f73cd..f56e53d 100644 --- ql/src/test/queries/clientpositive/union14.q +++ ql/src/test/queries/clientpositive/union14.q @@ -2,7 +2,9 @@ --! qt:dataset:src set hive.mapred.mode=nonstrict; set hive.map.aggr = true; --- SORT_BEFORE_DIFF + +-- SORT_QUERY_RESULTS + -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink explain diff --git ql/src/test/queries/clientpositive/union7.q ql/src/test/queries/clientpositive/union7.q index 4d3eed5..8668be0 100644 --- ql/src/test/queries/clientpositive/union7.q +++ ql/src/test/queries/clientpositive/union7.q @@ -4,7 +4,8 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.map.aggr = true; --- SORT_BEFORE_DIFF +-- SORT_QUERY_RESULTS + -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink explain diff --git ql/src/test/queries/clientpositive/union_null.q ql/src/test/queries/clientpositive/union_null.q index 49a0e08..44d148f 100644 --- ql/src/test/queries/clientpositive/union_null.q +++ ql/src/test/queries/clientpositive/union_null.q @@ -1,6 +1,7 @@ --! qt:dataset:src1 --! qt:dataset:src --- SORT_BEFORE_DIFF + +-- SORT_QUERY_RESULTS -- HIVE-2901 select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select cast(NULL as string) as x from src limit 5)b )a; diff --git ql/src/test/queries/clientpositive/union_view.q ql/src/test/queries/clientpositive/union_view.q index 186cb02..a378707 100644 --- ql/src/test/queries/clientpositive/union_view.q +++ ql/src/test/queries/clientpositive/union_view.q @@ -3,6 +3,8 @@ set hive.mapred.mode=nonstrict; set hive.stats.dbclass=fs; set hive.explain.user=false; +-- SORT_QUERY_RESULTS + CREATE TABLE src_union_1_n0 (key int, value string) PARTITIONED BY (ds string); CREATE TABLE src_union_2_n0 (key int, value string) PARTITIONED BY (ds string, part_1 string); diff --git ql/src/test/queries/clientpositive/vector_full_outer_join.q ql/src/test/queries/clientpositive/vector_full_outer_join.q new file mode 100644 index 0000000..cc77488 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_full_outer_join.q @@ -0,0 +1,82 @@ +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +-- SORT_QUERY_RESULTS + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; +INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; + +SET hive.mapjoin.full.outer=true; + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +-- Omit tjoin2.c1 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +-- Omit tjoin2.c1 and tjoin2.c2 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q new file mode 100644 index 0000000..1685f35 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q @@ -0,0 +1,290 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for FAST hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..8b59266 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q @@ -0,0 +1,290 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q new file mode 100644 index 0000000..869668e --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q @@ -0,0 +1,290 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized PASS-TRUE Mode MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + diff --git ql/src/test/queries/clientpositive/vector_join30.q ql/src/test/queries/clientpositive/vector_join30.q index 9672a47..74c4433 100644 --- ql/src/test/queries/clientpositive/vector_join30.q +++ ql/src/test/queries/clientpositive/vector_join30.q @@ -11,7 +11,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; CREATE TABLE orcsrc_n0 STORED AS ORC AS SELECT * FROM src; -explain vectorization expression +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -19,14 +19,14 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); -explain vectorization expression +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -34,116 +34,238 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) +JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN +RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +----------------- + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN +JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN @@ -151,12 +273,33 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); + +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); diff --git ql/src/test/queries/clientpositive/vector_join_filters.q ql/src/test/queries/clientpositive/vector_join_filters.q index 88458f8..b9f3740 100644 --- ql/src/test/queries/clientpositive/vector_join_filters.q +++ ql/src/test/queries/clientpositive/vector_join_filters.q @@ -14,7 +14,10 @@ CREATE TABLE myinput1_n1 STORED AS ORC AS SELECT * FROM myinput1_txt_n0; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a LEFT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -26,10 +29,21 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a LEFT OUTER JOIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a LEFT OUTER JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a LEFT OUTER JOIN myinput1_n1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n1 a LEFT OUTER JOIN myinput1_n1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1_n1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); diff --git ql/src/test/queries/clientpositive/vector_join_nulls.q ql/src/test/queries/clientpositive/vector_join_nulls.q index 3e8df9a..f87dc44 100644 --- ql/src/test/queries/clientpositive/vector_join_nulls.q +++ ql/src/test/queries/clientpositive/vector_join_nulls.q @@ -14,7 +14,11 @@ CREATE TABLE myinput1_n4 STORED AS ORC AS SELECT * FROM myinput1_txt_n1; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a JOIN myinput1_n4 b; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a LEFT OUTER JOIN myinput1_n4 b; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b; + SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a JOIN myinput1_n4 b ON a.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a JOIN myinput1_n4 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a JOIN myinput1_n4 b ON a.value = b.value; @@ -23,9 +27,21 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a LEFT OUTER JOIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a LEFT OUTER JOIN myinput1_n4 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a LEFT OUTER JOIN myinput1_n4 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a LEFT OUTER JOIN myinput1_n4 b ON a.key = b.key and a.value=b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.key; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.key; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.value = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.value = b.value; + +EXPLAIN VECTORIZATION OPERATOR +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key=b.key and a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key=b.key and a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n4 a LEFT OUTER JOIN myinput1_n4 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1_n4 c ON (b.value=c.value); diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q index 3e5ec7e..84f656b 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.fetch.task.conversion=none; @@ -20,14 +21,14 @@ INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -36,7 +37,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -44,7 +45,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -52,7 +53,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -60,7 +61,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q index 6ecfa1a..f9b4222 100644 --- ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q @@ -4,6 +4,7 @@ set hive.fetch.task.conversion=none; set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000; +SET hive.merge.nway.joins=false; -- SORT_QUERY_RESULTS @@ -27,167 +28,190 @@ select * from t4_n19; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain vectorization only summary - +explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization only summary +explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization only summary +explain vectorization expression select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization only summary +explain vectorization expression select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; -explain vectorization only summary +explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization only summary +explain vectorization expression select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization only summary +SET hive.mapjoin.full.outer=false; +explain vectorization expression select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization only summary +SET hive.mapjoin.full.outer=true; +explain vectorization expression +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; + +-- Verify this works (FULL OUTER MapJoin is not enabled for N-way) +SET hive.merge.nway.joins=true; +explain vectorization expression +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; +SET hive.merge.nway.joins=false; + +explain vectorization expression select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; -explain vectorization only summary -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization expression +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization only summary +SET hive.mapjoin.full.outer=true; +explain vectorization expression select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization only summary +explain vectorization expression select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; -explain vectorization only summary +explain vectorization expression select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; +set hive.llap.enable.grace.join.in.llap=true; -explain vectorization summary +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization summary +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization summary +explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization summary +explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; -explain vectorization summary +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization summary +explain vectorization operator select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization summary +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization summary +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; + +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; -explain vectorization summary -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; - -explain vectorization summary +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization summary +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; +-- select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; + +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; -explain vectorization summary +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; @@ -251,21 +275,29 @@ explain vectorization only operator select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization only operator +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; +-- select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; + explain vectorization only operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; explain vectorization only operator -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=true; explain vectorization only operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; +-- select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; explain vectorization only operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; @@ -277,85 +309,94 @@ select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a. set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; +set hive.llap.enable.grace.join.in.llap=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization detail +explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization detail +explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; + +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; -explain vectorization detail -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; @@ -363,166 +404,183 @@ set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization detail +explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization detail +explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; + +SET hive.mapjoin.full.outer=true; +explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; -explain vectorization detail -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; +set hive.llap.enable.grace.join.in.llap=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization detail +explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization detail +explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key; -explain vectorization detail +explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key; + +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key; -explain vectorization detail -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100; diff --git ql/src/test/queries/clientpositive/vector_nullsafe_join.q ql/src/test/queries/clientpositive/vector_nullsafe_join.q index 6a7ff72..2d7155e 100644 --- ql/src/test/queries/clientpositive/vector_nullsafe_join.q +++ ql/src/test/queries/clientpositive/vector_nullsafe_join.q @@ -38,7 +38,11 @@ select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key -- outer joins SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; +-- SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; + SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value; -- map joins @@ -65,7 +69,11 @@ select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key -- outer joins SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; +-- SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; + SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value; -- map joins diff --git ql/src/test/queries/clientpositive/vectorized_join46.q ql/src/test/queries/clientpositive/vectorized_join46.q index 145bc02..1a09398 100644 --- ql/src/test/queries/clientpositive/vectorized_join46.q +++ ql/src/test/queries/clientpositive/vectorized_join46.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.vectorized.execution.enabled=true; set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; @@ -15,7 +16,7 @@ INSERT INTO test2_n9 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), -- Basic outer join -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value); @@ -25,7 +26,7 @@ FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value); -- Conjunction with pred on multiple inputs and single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -39,7 +40,7 @@ ON (test1_n14.value=test2_n9.value AND test2_n9.key between 100 and 102); -- Conjunction with pred on single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.key between 100 and 102 @@ -51,7 +52,7 @@ ON (test1_n14.key between 100 and 102 AND test2_n9.key between 100 and 102); -- Conjunction with pred on multiple inputs and none (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND true); @@ -61,7 +62,7 @@ FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND true); -- Condition on one input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.key between 100 and 102); @@ -71,7 +72,7 @@ FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.key between 100 and 102); -- Disjunction with pred on multiple inputs and single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -85,7 +86,7 @@ ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -97,7 +98,7 @@ ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -109,7 +110,7 @@ ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102); -- Keys plus residual (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -123,7 +124,7 @@ ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -137,7 +138,7 @@ ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -149,7 +150,7 @@ ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -161,7 +162,7 @@ ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102); -- Keys plus residual (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -175,7 +176,7 @@ ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -189,7 +190,7 @@ ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -201,7 +202,7 @@ ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value @@ -213,7 +214,7 @@ ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102); -- Keys plus residual (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value diff --git ql/src/test/queries/clientpositive/vectorized_join46_mr.q ql/src/test/queries/clientpositive/vectorized_join46_mr.q new file mode 100644 index 0000000..7be2b0e --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_join46_mr.q @@ -0,0 +1,228 @@ +set hive.cli.print.header=true; +set hive.vectorized.execution.enabled=true; +set hive.auto.convert.join=true; +set hive.strict.checks.cartesian.product=false; +set hive.join.emit.interval=2; + +-- SORT_QUERY_RESULTS + +CREATE TABLE test1 (key INT, value INT, col_1 STRING); +INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), + (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); + +CREATE TABLE test2 (key INT, value INT, col_2 STRING); +INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), + (104, 3, 'Fli'), (105, NULL, 'None'); + + +-- Basic outer join +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value); + +-- Conjunction with pred on multiple inputs and single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102); + +-- Conjunction with pred on single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102); + +-- Conjunction with pred on multiple inputs and none (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true); + +-- Condition on one input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +-- Disjunction with pred on multiple inputs and single inputs (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +-- Disjunction with pred on multiple inputs and single inputs (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out index c2bf2e5..92b2fd8 100644 --- ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -893,7 +893,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) diff --git ql/src/test/results/clientpositive/auto_join18.q.out ql/src/test/results/clientpositive/auto_join18.q.out index 7e8de94..5851e2d 100644 --- ql/src/test/results/clientpositive/auto_join18.q.out +++ ql/src/test/results/clientpositive/auto_join18.q.out @@ -89,7 +89,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out index 9c0bffd..b692193 100644 --- ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out @@ -91,7 +91,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/auto_join6.q.out ql/src/test/results/clientpositive/auto_join6.q.out index a3e829f..578906e 100644 --- ql/src/test/results/clientpositive/auto_join6.q.out +++ ql/src/test/results/clientpositive/auto_join6.q.out @@ -83,7 +83,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/auto_join7.q.out ql/src/test/results/clientpositive/auto_join7.q.out index 1f2616e..a094d27 100644 --- ql/src/test/results/clientpositive/auto_join7.q.out +++ ql/src/test/results/clientpositive/auto_join7.q.out @@ -110,7 +110,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/beeline/mapjoin2.q.out ql/src/test/results/clientpositive/beeline/mapjoin2.q.out index 6b85e13..2288b4b 100644 --- ql/src/test/results/clientpositive/beeline/mapjoin2.q.out +++ ql/src/test/results/clientpositive/beeline/mapjoin2.q.out @@ -27,6 +27,85 @@ POSTHOOK: Output: default@tbl_n1 POSTHOOK: Lineage: tbl_n1.n SCRIPT [] POSTHOOK: Lineage: tbl_n1.t SCRIPT [] Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:tbl_n1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:tbl_n1 + TableScan + alias: tbl_n1 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: n (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 1L) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (n = 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1L (type: bigint), _col0 (type: string), _col1 is null (type: boolean), _col2 is null (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -37,6 +116,91 @@ POSTHOOK: Input: default@tbl_n1 #### A masked pattern was here #### 1 one true true Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:tbl_n1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:tbl_n1 + TableScan + alias: tbl_n1 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: n (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 2L) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (n = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 is null (type: boolean), _col1 is null (type: boolean), 2L (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -47,6 +211,81 @@ POSTHOOK: Input: default@tbl_n1 #### A masked pattern was here #### true true 2 two Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 1L) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (n = 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1L (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: string) + TableScan + alias: tbl_n1 + filterExpr: (n = 2L) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (n = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 2L (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 + 1 {false} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 is null (type: boolean), _col1 is null (type: boolean), _col2 is null (type: boolean), _col3 is null (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -58,6 +297,74 @@ POSTHOOK: Input: default@tbl_n1 false false true true true true false false Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:_dummy_table + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), 1 (type: int), 0 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -68,6 +375,77 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:_dummy_table + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), _col1 (type: int), 0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -78,6 +456,83 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:_dummy_table + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col2 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -87,6 +542,83 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:_dummy_table + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col2 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out index 733a3f5..122b136 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out @@ -274,7 +274,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -496,7 +496,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out index e39edcc..480b12e 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out @@ -232,7 +232,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -458,7 +458,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out index fa7968d..ba0a2ce 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out @@ -231,7 +231,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -455,7 +455,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out index 4b1313d..16137dd 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out @@ -633,7 +633,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/cbo_rp_join1.q.out ql/src/test/results/clientpositive/cbo_rp_join1.q.out index c5ec00d..38cb34d 100644 --- ql/src/test/results/clientpositive/cbo_rp_join1.q.out +++ ql/src/test/results/clientpositive/cbo_rp_join1.q.out @@ -53,7 +53,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 = 40)} 1 {(VALUE._col0 = 40)} @@ -156,7 +156,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 = 40)} {(VALUE._col1 = 40)} 1 {(VALUE._col0 = 40)} @@ -259,7 +259,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 = 40)} 1 {(VALUE._col0 = 40)} @@ -362,7 +362,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} 1 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} diff --git ql/src/test/results/clientpositive/correlationoptimizer8.q.out ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 102cc01..c403941 100644 --- ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -1102,7 +1102,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) diff --git ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out new file mode 100644 index 0000000..0ae9df9 --- /dev/null +++ ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out @@ -0,0 +1,176 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: s + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 diff --git ql/src/test/results/clientpositive/infer_join_preds.q.out ql/src/test/results/clientpositive/infer_join_preds.q.out index 57f6511..eefc68b 100644 --- ql/src/test/results/clientpositive/infer_join_preds.q.out +++ ql/src/test/results/clientpositive/infer_join_preds.q.out @@ -460,7 +460,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join18.q.out ql/src/test/results/clientpositive/join18.q.out index 9d82aa0..98a4f5f 100644 --- ql/src/test/results/clientpositive/join18.q.out +++ ql/src/test/results/clientpositive/join18.q.out @@ -88,7 +88,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join18_multi_distinct.q.out ql/src/test/results/clientpositive/join18_multi_distinct.q.out index a95e9b1..1c5ad14 100644 --- ql/src/test/results/clientpositive/join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/join18_multi_distinct.q.out @@ -90,7 +90,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join45.q.out ql/src/test/results/clientpositive/join45.q.out index cbabf7f..77dbaa2 100644 --- ql/src/test/results/clientpositive/join45.q.out +++ ql/src/test/results/clientpositive/join45.q.out @@ -1365,7 +1365,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1470,7 +1470,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join46.q.out ql/src/test/results/clientpositive/join46.q.out index c192194..ea8b71c 100644 --- ql/src/test/results/clientpositive/join46.q.out +++ ql/src/test/results/clientpositive/join46.q.out @@ -1423,7 +1423,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1522,7 +1522,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1619,7 +1619,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1718,7 +1718,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -1876,7 +1876,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/join47.q.out ql/src/test/results/clientpositive/join47.q.out index 6f529d6..2536f7f 100644 --- ql/src/test/results/clientpositive/join47.q.out +++ ql/src/test/results/clientpositive/join47.q.out @@ -1347,7 +1347,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1452,7 +1452,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join6.q.out ql/src/test/results/clientpositive/join6.q.out index 055eee2..1135cef 100644 --- ql/src/test/results/clientpositive/join6.q.out +++ ql/src/test/results/clientpositive/join6.q.out @@ -83,7 +83,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join7.q.out ql/src/test/results/clientpositive/join7.q.out index 0339c90..0bc6f89 100644 --- ql/src/test/results/clientpositive/join7.q.out +++ ql/src/test/results/clientpositive/join7.q.out @@ -110,7 +110,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join_filters_overlap.q.out ql/src/test/results/clientpositive/join_filters_overlap.q.out index 6557cac..ffb8cd3 100644 --- ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -780,7 +780,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 Left Outer Join 0 to 3 filter mappings: diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 4cfb1d9..910c379 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1285,11 +1285,11 @@ STAGE PLANS: selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 6:string Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 6, 2] + partitionColumns: 5:int + valueColumns: 0:string, 6:string, 2:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1492,11 +1492,11 @@ STAGE PLANS: projectedOutputColumnNums: [4, 2, 3] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [2, 3] + partitionColumns: 5:int + valueColumns: 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1653,11 +1653,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1683,11 +1682,11 @@ STAGE PLANS: vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3, 0, 1] + keyColumns: 2:string, 3:string, 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2, 3, 0, 1] - valueColumnNums: [4] + partitionColumns: 2:string, 3:string, 0:string, 1:string + valueColumns: 4:struct Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1732,11 +1731,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Select Vectorization: className: VectorSelectOperator native: true @@ -1766,6 +1764,9 @@ STAGE PLANS: className: VectorAppMasterEventOperator native: true Reducer 3 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -2077,11 +2078,11 @@ STAGE PLANS: selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 6:string Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 6, 2] + partitionColumns: 5:int + valueColumns: 0:string, 6:string, 2:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2284,11 +2285,11 @@ STAGE PLANS: projectedOutputColumnNums: [4, 2, 3] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [2, 3] + partitionColumns: 5:int + valueColumns: 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2446,11 +2447,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2476,11 +2476,11 @@ STAGE PLANS: vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3, 0, 1] + keyColumns: 2:string, 3:string, 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2, 3, 0, 1] - valueColumnNums: [4] + partitionColumns: 2:string, 3:string, 0:string, 1:string + valueColumns: 4:struct Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2525,11 +2525,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Select Vectorization: className: VectorSelectOperator native: true @@ -2559,6 +2558,9 @@ STAGE PLANS: className: VectorAppMasterEventOperator native: true Reducer 3 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/auto_join_filters.q.out ql/src/test/results/clientpositive/llap/auto_join_filters.q.out index a639792..7290fac 100644 --- ql/src/test/results/clientpositive/llap/auto_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/auto_join_filters.q.out @@ -54,6 +54,113 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n5 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} + 1 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 @@ -198,6 +305,142 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n5 #### A masked pattern was here #### 4939870 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(KEY.reducesinkkey0 > 40)} {(VALUE._col0 > 50)} {(KEY.reducesinkkey0 = VALUE._col0)} + 1 {(VALUE._col0 > 40)} {(KEY.reducesinkkey0 > 50)} {(VALUE._col0 = KEY.reducesinkkey0)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 @@ -340,6 +583,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n5 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 @@ -484,6 +737,142 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n5 #### A masked pattern was here #### 4939870 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(KEY.reducesinkkey0 > 40)} {(VALUE._col0 > 50)} {(KEY.reducesinkkey0 = VALUE._col0)} + 1 {(VALUE._col0 > 40)} {(KEY.reducesinkkey0 > 50)} {(VALUE._col0 = KEY.reducesinkkey0)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 diff --git ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out index 194fc5d..a160428 100644 --- ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out @@ -188,6 +188,139 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n2 #### A masked pattern was here #### 4543526 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +4543526 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n2 a LEFT OUTER JOIN myinput1_n2 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1_n2 c ON (b.value=c.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n2 diff --git ql/src/test/results/clientpositive/llap/convert_decimal64_to_decimal.q.out ql/src/test/results/clientpositive/llap/convert_decimal64_to_decimal.q.out index fddd2cb..3d0053c 100644 --- ql/src/test/results/clientpositive/llap/convert_decimal64_to_decimal.q.out +++ ql/src/test/results/clientpositive/llap/convert_decimal64_to_decimal.q.out @@ -207,10 +207,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -260,10 +259,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(9,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [19] + keyColumns: 19:decimal(9,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [14] + valueColumns: 14:smallint Statistics: Num rows: 950 Data size: 104800 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint) Execution mode: vectorized, llap @@ -417,10 +416,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -471,10 +469,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(9,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [19] + keyColumns: 19:decimal(9,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [14] + valueColumns: 14:smallint Statistics: Num rows: 950 Data size: 104800 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out index 0edeef9..6316323 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out @@ -2213,7 +2213,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -2355,7 +2355,291 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 525 Data size: 3612 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +652447 510 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 525 Data size: 3612 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +652447 510 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out index b075ecf..801948c 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out @@ -983,7 +983,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1153,7 +1153,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1229,6 +1229,910 @@ POSTHOOK: Input: default@src1 #### A masked pattern was here #### 12744278 500 652447 25 PREHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 500 652447 25 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 2457 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 500 652447 25 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 1131 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 1131 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 1131 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT a.key AS key, count(1) AS cnt FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a @@ -1320,7 +2224,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1352,42 +2256,207 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col0), sum(_col1) + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -1395,42 +2464,38 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt - FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a - FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 #### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt - FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a - FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -12744278 310 +1711763 3531902962 1711763 37 PREHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt - FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a - FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt - FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a - FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1441,141 +2506,136 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Group By Operator + aggregations: count(value) keys: key (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - mode: hash + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 262 Data size: 1131 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE Column stats: COMPLETE + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col0), sum(_col1) + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -1583,27 +2643,25 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt - FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a - FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 #### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt - FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a - FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -12744278 310 +1711763 3531902962 1711763 37 PREHOOK: query: EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -1790,9 +2848,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1828,7 +2887,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: y @@ -1854,17 +2913,32 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash @@ -1874,7 +2948,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1889,7 +2963,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out index f03e3d1..2aab36e 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out @@ -220,18 +220,22 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 13 10 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x JOIN T1_n146 y ON (x.key = y.key) JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x JOIN T1_n146 y ON (x.key = y.key) JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -252,60 +256,153 @@ STAGE PLANS: alias: x filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 6 Map Operator Tree: TableScan alias: z filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -331,11 +428,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -343,26 +464,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -394,18 +555,22 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 13 10 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x JOIN T1_n146 y ON (x.key = y.key) JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x JOIN T1_n146 y ON (x.key = y.key) JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -426,12 +591,23 @@ STAGE PLANS: alias: x filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -441,6 +617,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col1 input vertices: 1 Map 4 @@ -448,6 +630,14 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -456,55 +646,159 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: z filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -512,26 +806,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -563,18 +897,22 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 13 10 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY x.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -594,49 +932,130 @@ STAGE PLANS: TableScan alias: x Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: y Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 6 Map Operator Tree: TableScan alias: z Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -662,11 +1081,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -674,26 +1117,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -725,18 +1208,22 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 22 12 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY x.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY x.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -756,49 +1243,130 @@ STAGE PLANS: TableScan alias: x Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: y Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 6 Map Operator Tree: TableScan alias: z Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -824,11 +1392,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -836,26 +1428,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -887,18 +1519,22 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 22 12 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -918,49 +1554,130 @@ STAGE PLANS: TableScan alias: x Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: y Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 6 Map Operator Tree: TableScan alias: z Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -986,11 +1703,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -998,26 +1739,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1049,18 +1830,22 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 13 12 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT z.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY z.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT z.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY z.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1080,49 +1865,130 @@ STAGE PLANS: TableScan alias: x Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: y Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 6 Map Operator Tree: TableScan alias: z Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1148,11 +2014,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -1160,26 +2050,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1211,18 +2141,22 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 21 12 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT z.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY z.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT z.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY z.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1242,49 +2176,130 @@ STAGE PLANS: TableScan alias: x Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: y Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 6 Map Operator Tree: TableScan alias: z Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1310,11 +2325,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -1322,26 +2361,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1373,18 +2452,22 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 21 12 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1404,49 +2487,130 @@ STAGE PLANS: TableScan alias: x Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: y Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 6 Map Operator Tree: TableScan alias: z Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1472,11 +2636,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -1484,26 +2672,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1535,18 +2763,22 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 21 12 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1566,56 +2798,137 @@ STAGE PLANS: TableScan alias: x Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: y Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 6 Map Operator Tree: TableScan alias: z Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1634,11 +2947,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -1646,26 +2983,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1697,18 +3074,644 @@ POSTHOOK: Input: default@t2_n86 POSTHOOK: Input: default@t3_n34 #### A masked pattern was here #### 21 14 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 6 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n146 +PREHOOK: Input: default@t2_n86 +PREHOOK: Input: default@t3_n34 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n146 +POSTHOOK: Input: default@t2_n86 +POSTHOOK: Input: default@t3_n34 +#### A masked pattern was here #### +21 14 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 6 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n146 +PREHOOK: Input: default@t2_n86 +PREHOOK: Input: default@t3_n34 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n146 +POSTHOOK: Input: default@t2_n86 +POSTHOOK: Input: default@t3_n34 +#### A masked pattern was here #### +21 14 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1728,56 +3731,137 @@ STAGE PLANS: TableScan alias: x Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: y Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 6 Map Operator Tree: TableScan alias: z Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:val:string, 2:ROW__ID:struct] Select Operator expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1796,11 +3880,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 @@ -1808,26 +3916,66 @@ STAGE PLANS: Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: VectorUDFAdaptor(hash(_col0)) -> 2:int, VectorUDFAdaptor(hash(_col1)) -> 3:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/cross_prod_1.q.out ql/src/test/results/clientpositive/llap/cross_prod_1.q.out index df525c3..bdc9323 100644 --- ql/src/test/results/clientpositive/llap/cross_prod_1.q.out +++ ql/src/test/results/clientpositive/llap/cross_prod_1.q.out @@ -1973,7 +1973,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/llap/explainuser_4.q.out ql/src/test/results/clientpositive/llap/explainuser_4.q.out index c650698..aa65d4f 100644 --- ql/src/test/results/clientpositive/llap/explainuser_4.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_4.q.out @@ -270,7 +270,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_36] Map Join Operator [MAPJOIN_35] (rows=1501 width=236) - Conds:RS_31.KEY.reducesinkkey0=RS_34.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] + Conds:RS_31.KEY.reducesinkkey0=RS_34.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 4 [CUSTOM_SIMPLE_EDGE] vectorized, llap PARTITION_ONLY_SHUFFLE [RS_34] PartitionCols:_col2 @@ -351,7 +351,7 @@ Stage-0 Group By Operator [GBY_38] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Map Join Operator [MAPJOIN_37] (rows=1501 width=236) - Conds:RS_33.KEY.reducesinkkey0=RS_36.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true + Conds:RS_33.KEY.reducesinkkey0=RS_36.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true <-Map 4 [CUSTOM_SIMPLE_EDGE] vectorized, llap PARTITION_ONLY_SHUFFLE [RS_36] PartitionCols:_col0 @@ -431,7 +431,7 @@ Stage-0 Group By Operator [GBY_40] (rows=1501 width=236) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Map Join Operator [MAPJOIN_39] (rows=1501 width=236) - Conds:RS_35.KEY.reducesinkkey0=RS_38.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0"] + Conds:RS_35.KEY.reducesinkkey0=RS_38.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0"] <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized, llap PARTITION_ONLY_SHUFFLE [RS_38] PartitionCols:_col0 diff --git ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 0000000..c387af5 --- /dev/null +++ ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,3139 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:25.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:36.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:29.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/join46.q.out ql/src/test/results/clientpositive/llap/join46.q.out index 07c4a62..95d3611 100644 --- ql/src/test/results/clientpositive/llap/join46.q.out +++ ql/src/test/results/clientpositive/llap/join46.q.out @@ -1633,7 +1633,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1746,7 +1746,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1857,7 +1857,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1970,7 +1970,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -2148,7 +2148,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out index c0c9f95..c3b1eb7 100644 --- ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out +++ ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out @@ -230,6 +230,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -318,6 +319,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -407,6 +409,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -495,6 +498,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/llap/llap_acid.q.out ql/src/test/results/clientpositive/llap/llap_acid.q.out index 635f928..3058303 100644 --- ql/src/test/results/clientpositive/llap/llap_acid.q.out +++ ql/src/test/results/clientpositive/llap/llap_acid.q.out @@ -115,10 +115,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -259,11 +259,11 @@ STAGE PLANS: projectedOutputColumnNums: [5, 2, 3, 4] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [2, 3, 4] + partitionColumns: 6:int + valueColumns: 2:float, 3:double, 4:smallint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -369,10 +369,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out index c4dc6f7..50a2ee2 100644 --- ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out +++ ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out @@ -109,10 +109,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -253,11 +253,11 @@ STAGE PLANS: projectedOutputColumnNums: [5, 2, 3, 4] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [2, 3, 4] + partitionColumns: 6:int + valueColumns: 2:float, 3:double, 4:smallint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -363,10 +363,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_partitioned.q.out ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index e6fa1ac..f078ecc 100644 --- ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1645,13 +1645,15 @@ STAGE PLANS: 0 ctinyint (type: tinyint) 1 ctinyint (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [10] - bigTableRetainedColumnNums: [1, 6, 7, 10] - bigTableValueColumnNums: [1, 6, 7, 10] + bigTableKeyColumns: 10:tinyint + bigTableRetainColumnNums: [1, 6, 7, 10] + bigTableValueColumns: 1:int, 6:char(255), 7:varchar(255), 10:tinyint className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1, 6, 7, 10] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:int, 6:char(255), 7:varchar(255), 10:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col6, _col7, _col10 input vertices: 1 Map 2 @@ -1706,10 +1708,9 @@ STAGE PLANS: Map-reduce partition columns: ctinyint (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [10] + keyColumns: 10:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint) @@ -2115,10 +2116,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 694 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index b16b8d0..1e2b330 100644 --- ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -64,6 +64,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + hashTableImplementationType: FAST input vertices: 1 Map 3 Statistics: Num rows: 24737 Data size: 197896 Basic stats: COMPLETE Column stats: COMPLETE @@ -245,6 +246,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + hashTableImplementationType: FAST input vertices: 1 Map 3 Statistics: Num rows: 24737 Data size: 197896 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/mapjoin2.q.out ql/src/test/results/clientpositive/llap/mapjoin2.q.out index 872f918..c0d20b4 100644 --- ql/src/test/results/clientpositive/llap/mapjoin2.q.out +++ ql/src/test/results/clientpositive/llap/mapjoin2.q.out @@ -27,6 +27,86 @@ POSTHOOK: Output: default@tbl_n1 POSTHOOK: Lineage: tbl_n1.n SCRIPT [] POSTHOOK: Lineage: tbl_n1.t SCRIPT [] Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 1L) (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (n = 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: t (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1L (type: bigint), _col0 (type: string), _col1 is null (type: boolean), _col2 is null (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: tbl_n1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: n (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -37,6 +117,89 @@ POSTHOOK: Input: default@tbl_n1 #### A masked pattern was here #### 1 one true true Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 2' is a cross product +PREHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tbl_n1 + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: n (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 2L) (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (n = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: t (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 is null (type: boolean), _col1 is null (type: boolean), 2L (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -47,6 +210,95 @@ POSTHOOK: Input: default@tbl_n1 #### A masked pattern was here #### true true 2 two Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 1L) (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (n = 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1L (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 2L) (type: boolean) + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (n = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 2L (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 + 1 {false} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 is null (type: boolean), _col1 is null (type: boolean), _col2 is null (type: boolean), _col3 is null (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -58,6 +310,75 @@ POSTHOOK: Input: default@tbl_n1 false false true true true true false false Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + input vertices: + 1 Map 2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), 1 (type: int), 0 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -68,6 +389,79 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), _col1 (type: int), 0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -78,6 +472,82 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col2 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -87,6 +557,82 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col2 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/mapjoin46.q.out ql/src/test/results/clientpositive/llap/mapjoin46.q.out index 52eb609..d0d9c87 100644 --- ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -128,14 +128,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -239,12 +239,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -344,12 +344,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob 102 2 Del +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob 102 2 Del -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -438,10 +438,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product @@ -535,18 +535,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat NULL NULL NULL 100 1 Bob 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli 100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del -101 2 Car 105 NULL None -101 2 Car 104 3 Fli 101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -644,19 +644,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli 100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del -101 2 Car 105 NULL None -101 2 Car 104 3 Fli 101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -750,19 +750,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli 100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del -101 2 Car 105 NULL None -101 2 Car 104 3 Fli 101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -856,14 +856,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob 102 2 Del -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -961,13 +961,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1065,19 +1065,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 102 2 Del -99 0 Alice 102 2 Del -98 NULL None 102 2 Del -101 2 Car 103 2 Ema 100 1 Bob 103 2 Ema -99 2 Mat 103 2 Ema -101 2 Car 104 3 Fli 100 1 Bob 104 3 Fli -101 2 Car 105 NULL None 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1171,16 +1171,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 102 2 Del -101 2 Car 103 2 Ema 100 1 Bob 103 2 Ema -99 2 Mat 103 2 Ema -101 2 Car 104 3 Fli 100 1 Bob 104 3 Fli -101 2 Car 105 NULL None 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1274,16 +1274,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 102 2 Del -99 0 Alice 102 2 Del -98 NULL None 102 2 Del +101 2 Car 102 2 Del 101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -1381,9 +1381,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product @@ -1448,7 +1448,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1488,11 +1488,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1501,18 +1496,25 @@ NULL NULL None 102 2 Del 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1561,12 +1563,12 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -1586,7 +1588,8 @@ Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduce PREHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1_n4 PREHOOK: Input: default@test2_n2 @@ -1594,16 +1597,12 @@ PREHOOK: Input: default@test2_n2 POSTHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1612,18 +1611,23 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1672,12 +1676,12 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -1697,7 +1701,7 @@ Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduce PREHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1_n4 PREHOOK: Input: default@test2_n2 @@ -1705,34 +1709,36 @@ PREHOOK: Input: default@test2_n2 POSTHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del 101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli -NULL NULL NULL 105 NULL None +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1743,7 +1749,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1756,11 +1762,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1773,11 +1777,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1785,16 +1787,16 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 10 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1806,11 +1808,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1_n4 PREHOOK: Input: default@test2_n2 @@ -1818,29 +1820,474 @@ PREHOOK: Input: default@test2_n2 POSTHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL NULL 105 NULL None -NULL NULL None NULL NULL NULL +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL -100 1 Bob NULL NULL NULL 99 2 Mat 102 2 Del -101 2 Car 102 2 Del -101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli -Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * -FROM ( - SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, - test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 - FROM test1_n4 RIGHT OUTER JOIN test2_n2 - ON (test1_n4.value=test2_n2.value +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 10 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 10 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value AND (test1_n4.key between 100 and 102 OR test2_n2.key between 100 and 102)) ) sq1 @@ -1972,7 +2419,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -2038,23 +2485,248 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL 101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL 101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL -NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL -NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL -NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL -NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema -NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 4 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 8 Data size: 859 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 8 Data size: 859 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 8 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 8 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} + Statistics: Num rows: 64 Data size: 23107 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 64 Data size: 23107 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/mergejoin.q.out ql/src/test/results/clientpositive/llap/mergejoin.q.out index 1e4f632..b86d822 100644 --- ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -52,10 +52,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -105,10 +105,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Select Operator @@ -135,10 +135,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap @@ -176,6 +175,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -208,10 +210,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -444,6 +445,9 @@ STAGE PLANS: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1535,6 +1539,9 @@ STAGE PLANS: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1655,6 +1662,9 @@ STAGE PLANS: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1764,10 +1774,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1809,10 +1818,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1837,7 +1845,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1851,6 +1859,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1971,10 +1982,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -2025,10 +2036,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) @@ -2054,10 +2064,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap @@ -2107,10 +2116,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2159,6 +2167,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2178,6 +2189,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -2248,10 +2262,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:int, 1:int, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 @@ -2286,10 +2299,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -2369,10 +2381,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2422,10 +2433,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2464,6 +2474,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2594,6 +2607,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Map 6 Map Operator Tree: TableScan @@ -2624,10 +2640,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2677,10 +2692,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2719,6 +2733,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -2819,10 +2836,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2872,10 +2888,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2914,6 +2929,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3032,10 +3050,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -3086,10 +3104,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) @@ -3115,10 +3132,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap @@ -3168,10 +3184,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3220,6 +3235,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3239,6 +3257,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3309,10 +3330,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:int, 1:int, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 @@ -3347,10 +3367,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -3447,6 +3466,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Map 6 Map Operator Tree: TableScan @@ -3477,10 +3499,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3530,10 +3551,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3572,6 +3592,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3683,10 +3706,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3735,10 +3757,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3786,6 +3807,9 @@ STAGE PLANS: sort order: Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out index 54ccf58..d94b5e7 100644 --- ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out +++ ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out @@ -271,8 +271,8 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_GBY_8: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_7: 2100 - RECORDS_OUT_OPERATOR_SEL_6: 2100 - RECORDS_OUT_OPERATOR_TS_0: 2100 + RECORDS_OUT_OPERATOR_SEL_6: 3 + RECORDS_OUT_OPERATOR_TS_0: 3 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 262144 ALLOCATED_USED_BYTES: 26 @@ -327,13 +327,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 8 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 8 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 8 - RECORDS_OUT_OPERATOR_SEL_9: 8 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 1048576 ALLOCATED_USED_BYTES: 2732 @@ -367,13 +367,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 22 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 22 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 22 - RECORDS_OUT_OPERATOR_SEL_9: 22 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -405,13 +405,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 16 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 16 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 16 - RECORDS_OUT_OPERATOR_SEL_9: 16 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -443,13 +443,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 18 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 18 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 18 - RECORDS_OUT_OPERATOR_SEL_9: 18 - RECORDS_OUT_OPERATOR_TS_0: 2000 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -487,7 +487,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 1 RECORDS_OUT_OPERATOR_SEL_9: 1 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -519,13 +519,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 32 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 32 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 32 - RECORDS_OUT_OPERATOR_SEL_9: 32 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -557,13 +557,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 32 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 32 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 32 - RECORDS_OUT_OPERATOR_SEL_9: 32 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -595,13 +595,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 1697 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 1697 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 1697 - RECORDS_OUT_OPERATOR_SEL_9: 1697 - RECORDS_OUT_OPERATOR_TS_0: 2000 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -633,13 +633,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 12 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 12 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 12 - RECORDS_OUT_OPERATOR_SEL_9: 12 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -671,13 +671,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 1713 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 1713 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 1713 - RECORDS_OUT_OPERATOR_SEL_9: 1713 - RECORDS_OUT_OPERATOR_TS_0: 2000 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -709,13 +709,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 6 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 6 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 6 - RECORDS_OUT_OPERATOR_SEL_9: 6 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -747,13 +747,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 50 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 50 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 50 - RECORDS_OUT_OPERATOR_SEL_9: 50 - RECORDS_OUT_OPERATOR_TS_0: 1100 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -785,13 +785,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 318 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 318 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 318 - RECORDS_OUT_OPERATOR_SEL_9: 318 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 @@ -959,13 +959,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2 - RECORDS_OUT_OPERATOR_SEL_9: 2 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 1310720 ALLOCATED_USED_BYTES: 13810 @@ -999,13 +999,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 6 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 6 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 6 - RECORDS_OUT_OPERATOR_SEL_9: 6 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1037,13 +1037,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 6 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 6 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 6 - RECORDS_OUT_OPERATOR_SEL_9: 6 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1097,13 +1097,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2100 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2100 + RECORDS_OUT_OPERATOR_FIL_8: 3 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2100 - RECORDS_OUT_OPERATOR_SEL_9: 2100 - RECORDS_OUT_OPERATOR_TS_0: 2100 + RECORDS_OUT_OPERATOR_SEL_9: 3 + RECORDS_OUT_OPERATOR_TS_0: 3 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1170,13 +1170,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2 - RECORDS_OUT_OPERATOR_SEL_9: 2 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1208,13 +1208,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2 - RECORDS_OUT_OPERATOR_SEL_9: 2 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1246,13 +1246,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2 - RECORDS_OUT_OPERATOR_SEL_9: 2 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1284,13 +1284,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 81 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 81 + RECORDS_OUT_OPERATOR_FIL_8: 3 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 81 - RECORDS_OUT_OPERATOR_SEL_9: 81 - RECORDS_OUT_OPERATOR_TS_0: 2100 + RECORDS_OUT_OPERATOR_SEL_9: 3 + RECORDS_OUT_OPERATOR_TS_0: 3 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1322,13 +1322,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 74 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 74 + RECORDS_OUT_OPERATOR_FIL_8: 3 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 74 - RECORDS_OUT_OPERATOR_SEL_9: 74 - RECORDS_OUT_OPERATOR_TS_0: 2100 + RECORDS_OUT_OPERATOR_SEL_9: 3 + RECORDS_OUT_OPERATOR_TS_0: 3 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1360,13 +1360,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 12 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 12 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 12 - RECORDS_OUT_OPERATOR_SEL_9: 12 - RECORDS_OUT_OPERATOR_TS_0: 2000 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1398,13 +1398,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 13 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 13 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 13 - RECORDS_OUT_OPERATOR_SEL_9: 13 - RECORDS_OUT_OPERATOR_TS_0: 2000 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1442,7 +1442,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 1 RECORDS_OUT_OPERATOR_SEL_9: 1 - RECORDS_OUT_OPERATOR_TS_0: 100 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1474,13 +1474,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 7 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 7 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 7 - RECORDS_OUT_OPERATOR_SEL_9: 7 - RECORDS_OUT_OPERATOR_TS_0: 1100 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1617,13 +1617,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2 - RECORDS_OUT_OPERATOR_SEL_9: 2 - RECORDS_OUT_OPERATOR_TS_0: 100 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1655,13 +1655,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 6 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 6 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 6 - RECORDS_OUT_OPERATOR_SEL_9: 6 - RECORDS_OUT_OPERATOR_TS_0: 1100 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 5935 CACHE_MISS_BYTES: 0 @@ -1693,13 +1693,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2 - RECORDS_OUT_OPERATOR_SEL_9: 2 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 6990 CACHE_MISS_BYTES: 0 @@ -1731,13 +1731,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2 - RECORDS_OUT_OPERATOR_SEL_9: 2 - RECORDS_OUT_OPERATOR_TS_0: 100 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 6990 CACHE_MISS_BYTES: 0 diff --git ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out index 93e2667..511e722 100644 --- ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out +++ ql/src/test/results/clientpositive/llap/orc_llap_counters1.q.out @@ -267,13 +267,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2094 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2094 + RECORDS_OUT_OPERATOR_FIL_8: 3 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2094 - RECORDS_OUT_OPERATOR_SEL_9: 2094 - RECORDS_OUT_OPERATOR_TS_0: 2100 + RECORDS_OUT_OPERATOR_SEL_9: 3 + RECORDS_OUT_OPERATOR_TS_0: 3 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 1310720 ALLOCATED_USED_BYTES: 2758 @@ -307,13 +307,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 2094 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 2094 + RECORDS_OUT_OPERATOR_FIL_8: 3 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 2094 - RECORDS_OUT_OPERATOR_SEL_9: 2094 - RECORDS_OUT_OPERATOR_TS_0: 2100 + RECORDS_OUT_OPERATOR_SEL_9: 3 + RECORDS_OUT_OPERATOR_TS_0: 3 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 CACHE_MISS_BYTES: 0 diff --git ql/src/test/results/clientpositive/llap/partialdhj.q.out ql/src/test/results/clientpositive/llap/partialdhj.q.out index ec0044f..4546d85 100644 --- ql/src/test/results/clientpositive/llap/partialdhj.q.out +++ ql/src/test/results/clientpositive/llap/partialdhj.q.out @@ -110,6 +110,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 32 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator keys: _col0 (type: string) @@ -147,6 +148,7 @@ STAGE PLANS: input vertices: 0 Reducer 3 Statistics: Num rows: 26 Data size: 6942 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -343,6 +345,7 @@ STAGE PLANS: input vertices: 1 Reducer 5 Statistics: Num rows: 26 Data size: 6942 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -364,6 +367,7 @@ STAGE PLANS: input vertices: 1 Map 6 Statistics: Num rows: 32 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/reopt_dpp.q.out ql/src/test/results/clientpositive/llap/reopt_dpp.q.out index 31726f6..e248575 100644 --- ql/src/test/results/clientpositive/llap/reopt_dpp.q.out +++ ql/src/test/results/clientpositive/llap/reopt_dpp.q.out @@ -244,7 +244,7 @@ Stage-0 Output:["_col0"] Filter Operator [FIL_24] (runtime: rows=1 width=8) predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_3] (runtime: rows=2 width=8) + TableScan [TS_3] (runtime: rows=1 width=8) default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] Dynamic Partitioning Event Operator [EVENT_29] (runtime: rows=1 width=8) Group By Operator [GBY_28] (runtime: rows=1 width=8) diff --git ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out index 7518ae6..37143cb 100644 --- ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out +++ ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out @@ -336,7 +336,7 @@ STAGE PLANS: TableScan alias: d filterExpr: ((d_moy = 3) and d_date_sk is not null) (type: boolean) - Statistics: (RUNTIME) Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d_moy = 3) and d_date_sk is not null) (type: boolean) Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/results_cache_2.q.out ql/src/test/results/clientpositive/llap/results_cache_2.q.out index 6f8d52f..b17c58d 100644 --- ql/src/test/results/clientpositive/llap/results_cache_2.q.out +++ ql/src/test/results/clientpositive/llap/results_cache_2.q.out @@ -221,14 +221,14 @@ STAGE PLANS: TableScan alias: src filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 500/500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500/1 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (UDFToDouble(key) < 10.0D) (type: boolean) - Statistics: Num rows: 166/10 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166/1 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sign(value) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 166/10 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166/1 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: double) diff --git ql/src/test/results/clientpositive/llap/runtime_stats_hs2.q.out ql/src/test/results/clientpositive/llap/runtime_stats_hs2.q.out index 76cee83..a94fb41 100644 --- ql/src/test/results/clientpositive/llap/runtime_stats_hs2.q.out +++ ql/src/test/results/clientpositive/llap/runtime_stats_hs2.q.out @@ -123,19 +123,19 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_33] PartitionCols:_col0 - Select Operator [SEL_32] (runtime: rows=5 width=4) + Select Operator [SEL_32] (runtime: rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_31] (runtime: rows=5 width=4) + Filter Operator [FIL_31] (runtime: rows=1 width=4) predicate:((u < 10) and (u > 2)) - TableScan [TS_0] (runtime: rows=8 width=4) + TableScan [TS_0] (runtime: rows=1 width=4) default@tx_n3,tx_n3,Tbl:COMPLETE,Col:COMPLETE,Output:["u"] <-Map 4 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_36] PartitionCols:_col0 - Select Operator [SEL_35] (runtime: rows=3 width=4) + Select Operator [SEL_35] (runtime: rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_34] (runtime: rows=3 width=4) + Filter Operator [FIL_34] (runtime: rows=1 width=4) predicate:((p < 10) and (p > 2)) - TableScan [TS_3] (runtime: rows=5 width=4) + TableScan [TS_3] (runtime: rows=1 width=4) default@px_n0,px_n0,Tbl:COMPLETE,Col:COMPLETE,Output:["p"] diff --git ql/src/test/results/clientpositive/llap/semijoin.q.out ql/src/test/results/clientpositive/llap/semijoin.q.out index f343732..3ac562c 100644 --- ql/src/test/results/clientpositive/llap/semijoin.q.out +++ ql/src/test/results/clientpositive/llap/semijoin.q.out @@ -1996,7 +1996,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Semi Join 1 to 2 keys: 0 key (type: int) @@ -2422,7 +2422,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out index bbaf94b..0c1ad42 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out @@ -464,7 +464,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -838,7 +838,7 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1044,7 +1044,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1161,7 +1161,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1275,8 +1275,8 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out index 6d273cd..6a63463 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out @@ -464,7 +464,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -838,7 +838,7 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1044,7 +1044,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1161,7 +1161,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1275,8 +1275,8 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out index cfa87a7..f272ab5 100644 --- ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out @@ -499,6 +499,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Reduce Output Operator key expressions: _col2 (type: int) @@ -633,6 +634,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -768,6 +770,7 @@ STAGE PLANS: input vertices: 1 Map 5 Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() diff --git ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out index 2a03d37..73782cf 100644 --- ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out +++ ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out @@ -197,6 +197,115 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + HybridGraceHashJoin: true + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 4 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 4 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Limit Number of rows: 1 diff --git ql/src/test/results/clientpositive/llap/tez_input_counters.q.out ql/src/test/results/clientpositive/llap/tez_input_counters.q.out index f24906f..d346f04 100644 --- ql/src/test/results/clientpositive/llap/tez_input_counters.q.out +++ ql/src/test/results/clientpositive/llap/tez_input_counters.q.out @@ -1581,8 +1581,8 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_GBY_9: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_8: 1000 - RECORDS_OUT_OPERATOR_SEL_7: 1000 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_7: 618 + RECORDS_OUT_OPERATOR_TS_0: 618 Stage-1 LLAP IO COUNTERS: CACHE_MISS_BYTES: 3812 NUM_DECODED_BATCHES: 618 @@ -1612,8 +1612,8 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 8 - RECORDS_OUT_OPERATOR_SEL_9: 8 - RECORDS_OUT_OPERATOR_TS_0: 8 + RECORDS_OUT_OPERATOR_SEL_9: 4 + RECORDS_OUT_OPERATOR_TS_0: 4 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 18 NUM_DECODED_BATCHES: 4 @@ -1715,8 +1715,8 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 240 - RECORDS_OUT_OPERATOR_SEL_9: 240 - RECORDS_OUT_OPERATOR_TS_0: 240 + RECORDS_OUT_OPERATOR_SEL_9: 148 + RECORDS_OUT_OPERATOR_TS_0: 148 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 922 NUM_DECODED_BATCHES: 148 @@ -1825,10 +1825,10 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_GBY_7: 74 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_14: 240 - RECORDS_OUT_OPERATOR_SEL_11: 240 - RECORDS_OUT_OPERATOR_SEL_13: 240 + RECORDS_OUT_OPERATOR_SEL_11: 148 + RECORDS_OUT_OPERATOR_SEL_13: 148 RECORDS_OUT_OPERATOR_SEL_8: 74 - RECORDS_OUT_OPERATOR_TS_0: 240 + RECORDS_OUT_OPERATOR_TS_0: 148 TOTAL_TABLE_ROWS_WRITTEN: 240 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 922 @@ -2346,21 +2346,21 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_INTERMEDIATE_Map_4: 240 RECORDS_OUT_INTERMEDIATE_Reducer_2: 952 RECORDS_OUT_INTERMEDIATE_Reducer_3: 0 - RECORDS_OUT_OPERATOR_EVENT_30: 309 + RECORDS_OUT_OPERATOR_EVENT_30: 1 RECORDS_OUT_OPERATOR_FS_34: 1 - RECORDS_OUT_OPERATOR_GBY_29: 309 + RECORDS_OUT_OPERATOR_GBY_29: 1 RECORDS_OUT_OPERATOR_GBY_33: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_MERGEJOIN_25: 952 RECORDS_OUT_OPERATOR_RS_11: 952 RECORDS_OUT_OPERATOR_RS_27: 1000 RECORDS_OUT_OPERATOR_RS_32: 240 - RECORDS_OUT_OPERATOR_SEL_26: 1000 - RECORDS_OUT_OPERATOR_SEL_28: 1000 - RECORDS_OUT_OPERATOR_SEL_31: 240 + RECORDS_OUT_OPERATOR_SEL_26: 618 + RECORDS_OUT_OPERATOR_SEL_28: 618 + RECORDS_OUT_OPERATOR_SEL_31: 74 RECORDS_OUT_OPERATOR_SEL_9: 952 - RECORDS_OUT_OPERATOR_TS_0: 1000 - RECORDS_OUT_OPERATOR_TS_3: 240 + RECORDS_OUT_OPERATOR_TS_0: 618 + RECORDS_OUT_OPERATOR_TS_3: 74 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 3812 CACHE_MISS_BYTES: 922 diff --git ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out index dcc7e9d..55c0eb4 100644 --- ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out @@ -499,6 +499,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Reduce Output Operator key expressions: _col2 (type: int) @@ -633,6 +634,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -768,6 +770,7 @@ STAGE PLANS: input vertices: 1 Map 5 Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() diff --git ql/src/test/results/clientpositive/llap/union7.q.out ql/src/test/results/clientpositive/llap/union7.q.out index a157f36..612bf86 100644 --- ql/src/test/results/clientpositive/llap/union7.q.out +++ ql/src/test/results/clientpositive/llap/union7.q.out @@ -128,20 +128,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -128 1 -213 1 -278 1 -369 1 -tst1 1 10 -150 1 -238 1 -66 1 +128 1 146 1 +150 1 +213 1 224 1 +238 1 255 1 273 1 +278 1 311 1 +369 1 401 1 406 1 +66 1 98 1 +tst1 1 diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index b1fa6a7..a9971d4 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -155,10 +155,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:decimal(38,18), 1:decimal(38,18), 2:decimal(38,18), 3:bigint Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: bigint) Execution mode: vectorized, llap @@ -295,10 +294,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:double, 1:double, 2:double, 3:bigint Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap @@ -435,10 +433,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:timestamp, 1:timestamp, 2:double, 3:bigint Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index 46227d2..396afd3 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -95,10 +95,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:int, 1:string Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index ab37a4d..eb4b262 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -106,6 +106,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -240,6 +243,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -448,6 +454,9 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Map 5 Map Operator Tree: TableScan @@ -494,6 +503,9 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -549,6 +561,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: @@ -705,6 +720,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -844,6 +862,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1007,6 +1028,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1158,6 +1182,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1353,6 +1380,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1478,6 +1508,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1623,6 +1656,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1770,6 +1806,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1944,6 +1983,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2_n40 Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-3 Dependency Collection @@ -2169,6 +2211,9 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_between_columns.q.out ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index 1824976..be0e5ca 100644 --- ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_columns.q.out @@ -196,6 +196,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -368,6 +371,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 6c461f0..cabc2b7 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -157,6 +157,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 4 @@ -571,6 +572,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index cd78689..8489ef4 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -415,10 +415,10 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 3] + keyColumns: 1:timestamp, 2:string, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [10, 12, 13, 14, 11, 7, 16, 23] + valueColumns: 10:string, 12:string, 13:string, 14:int, 11:string, 7:int, 16:int, 23:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap @@ -700,10 +700,10 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 3] + keyColumns: 1:timestamp, 2:string, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [15, 26, 36, 40, 42, 44, 46, 53] + valueColumns: 15:string, 26:string, 36:string, 40:int, 42:string, 44:int, 46:int, 53:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out index 4ae125b..c92eddf 100644 --- ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out @@ -178,6 +178,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 @@ -391,6 +392,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -562,6 +564,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index 2fda570..1953826 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -99,14 +99,15 @@ STAGE PLANS: 0 _col0 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 2] - smallTableMapping: [2] + projectedOutput: 0:bigint, 2:bigint + smallTableValueMapping: 2:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col2 input vertices: 1 Map 2 @@ -169,10 +170,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out index e609d14..b1cceee 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out @@ -70,10 +70,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:int Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index 4e1698d..b6247ef 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -690,10 +690,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:string, 1:map, 2:array, 3:struct Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) Execution mode: vectorized, llap @@ -730,10 +729,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap Map Vectorization: @@ -769,10 +766,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap Map Vectorization: @@ -953,10 +948,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -1178,10 +1172,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 13503 Data size: 4721072 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap @@ -1321,10 +1315,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 13503 Data size: 7697400 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index b908894..d222460 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -73,6 +73,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 input vertices: 1 Map 2 @@ -254,6 +255,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -399,6 +401,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/llap/vector_date_1.q.out ql/src/test/results/clientpositive/llap/vector_date_1.q.out index 8ff6ddf..432356e 100644 --- ql/src/test/results/clientpositive/llap/vector_date_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_date_1.q.out @@ -128,10 +128,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] + valueColumns: 1:date, 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean, 8:boolean, 9:boolean, 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap @@ -293,10 +293,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] + valueColumns: 1:date, 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean, 8:boolean, 9:boolean, 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap @@ -458,10 +458,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7] + valueColumns: 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized, llap @@ -623,10 +623,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7] + valueColumns: 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized, llap @@ -793,10 +793,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:date Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index fd934f0..e85ea15 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -87,10 +87,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -204,10 +203,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -321,10 +319,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -438,10 +435,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -555,10 +551,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -672,10 +667,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -789,10 +783,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -906,10 +899,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1023,10 +1015,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out index 3170625..4423a12 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out @@ -76,10 +76,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -192,10 +191,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out index 24873a4..a783aa4 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out @@ -65,10 +65,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -181,10 +180,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -297,10 +295,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -413,10 +410,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -529,10 +525,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -645,10 +640,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -761,10 +755,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -877,10 +870,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1004,10 +996,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1120,10 +1111,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1236,10 +1226,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1352,10 +1341,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1468,10 +1456,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1584,10 +1571,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1700,10 +1686,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1816,10 +1801,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out index 8607eed..eec4855 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out @@ -149,10 +149,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(10,5), 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 27 Data size: 2684 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -290,10 +289,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(17,4), 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -442,10 +440,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(18,5) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 54 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -486,10 +483,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(18,5) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 54 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -670,10 +666,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:decimal(11,5) Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(11,5)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 30a6770..ef94587 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -100,10 +100,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 7:decimal(23,14), 8:decimal(33,14), 9:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized, llap @@ -280,10 +280,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:double, 6:double, 7:bigint, 8:decimal(23,14), 9:decimal(23,14), 10:decimal(33,14), 11:double, 12:double, 13:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized, llap @@ -494,10 +494,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:bigint, 6:decimal(16,0), 7:decimal(16,0), 8:decimal(26,0), 9:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)), _col9 (type: bigint) Execution mode: vectorized, llap @@ -693,10 +693,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:double, 6:double, 7:bigint, 8:decimal(16,0), 9:decimal(16,0), 10:decimal(26,0), 11:double, 12:double, 13:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out index a7b2714..3bb1c6f 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out @@ -84,10 +84,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + keyColumns: 4:decimal(25,14), 6:decimal(26,14), 8:decimal(38,13), 10:decimal(38,17), 11:decimal(12,10), 12:int, 13:smallint, 14:tinyint, 15:bigint, 16:boolean, 17:double, 18:float, 19:string, 20:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 455 Data size: 100294 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -258,10 +257,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [6, 8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + keyColumns: 6:decimal(11,3), 8:decimal(11,3), 10:decimal(21,11), 12:decimal(23,9), 13:decimal(5,3), 14:int, 15:smallint, 16:tinyint, 17:bigint, 18:boolean, 19:double, 20:float, 21:string, 22:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 455 Data size: 100294 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index b66fb9f..aaab592 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -195,10 +195,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(26,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(26,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -472,10 +471,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(26,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(26,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(24,0) Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(24,0)) Execution mode: vectorized, llap @@ -826,10 +825,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1103,10 +1101,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(14,0) Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(14,0)) Execution mode: vectorized, llap @@ -1382,10 +1380,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1661,10 +1658,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(14,0) Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(14,0)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 5e7e8ca..22db3f1 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -601,10 +601,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:decimal(30,10), 1:bigint Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint) Execution mode: vectorized, llap @@ -1220,10 +1219,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:decimal(30,10), 1:bigint Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index eb4a588..6fbd72e 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -74,10 +74,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:decimal(11,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap @@ -191,10 +191,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(10,0) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap @@ -336,10 +336,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(11,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap @@ -453,10 +453,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(10,0) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap @@ -598,10 +598,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:decimal(11,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap @@ -715,10 +715,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(10,0) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index bb0cbfc..32ff100 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -78,10 +78,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(21,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + valueColumns: 3:decimal(21,0), 4:decimal(22,1), 5:decimal(23,2), 6:decimal(24,3), 7:decimal(21,0), 8:decimal(21,0), 9:decimal(21,0), 10:decimal(21,0), 11:decimal(21,0), 12:decimal(21,0), 13:decimal(21,0), 14:decimal(21,0) Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized, llap @@ -246,10 +246,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(21,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + valueColumns: 4:decimal(21,0), 5:decimal(22,1), 6:decimal(23,2), 7:decimal(24,3), 8:decimal(25,4), 9:decimal(21,0), 10:decimal(21,0), 11:decimal(21,0), 12:decimal(21,0), 13:decimal(21,0), 14:decimal(21,0), 15:decimal(22,1), 16:decimal(23,2), 17:decimal(24,3), 18:decimal(25,4), 19:decimal(21,0), 20:decimal(21,0), 21:decimal(21,0), 22:decimal(21,0) Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized, llap @@ -441,10 +441,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(21,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + valueColumns: 3:decimal(21,0), 4:decimal(21,0), 5:decimal(21,0), 6:decimal(21,0), 7:decimal(21,0), 8:decimal(21,0), 9:decimal(21,0), 10:decimal(21,0), 11:decimal(21,0), 12:decimal(21,0), 13:decimal(21,0), 14:decimal(21,0), 15:decimal(21,0), 16:decimal(21,0), 17:decimal(21,0), 18:decimal(21,0), 19:decimal(22,1), 20:decimal(23,2), 21:decimal(24,3), 22:decimal(25,4), 23:decimal(26,5), 24:decimal(27,6), 25:decimal(28,7), 26:decimal(29,8), 27:decimal(30,9), 28:decimal(31,10), 29:decimal(32,11), 30:decimal(33,12), 31:decimal(34,13), 32:decimal(35,14), 33:decimal(36,15), 34:decimal(37,16) Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized, llap @@ -625,10 +625,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(30,9) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4] + valueColumns: 4:decimal(30,9) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out index a35f6fe..4647bcb 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out @@ -108,10 +108,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:decimal(10,4), 2:decimal(15,8) Statistics: Num rows: 30 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(10,4)), _col2 (type: decimal(15,8)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index f3d2e6f..f7219f0 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -2320,10 +2320,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:decimal(30,10), 2:bigint Statistics: Num rows: 18 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint) Execution mode: vectorized, llap @@ -2387,10 +2387,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 6, 1] + valueColumns: 4:decimal(38,18), 6:decimal(38,28), 1:decimal(30,10) Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(38,28)), _col3 (type: decimal(30,10)) Reducer 3 @@ -3261,10 +3261,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:double, 2:double, 3:bigint Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap @@ -3425,10 +3425,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:double, 2:double, 3:bigint Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap @@ -3668,10 +3668,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(20,10) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap @@ -3799,10 +3798,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(20,10) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap @@ -3930,10 +3928,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -6301,10 +6298,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:decimal(25,3), 2:bigint Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(25,3)), _col2 (type: bigint) Execution mode: vectorized, llap @@ -6368,10 +6365,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 6, 1] + valueColumns: 4:decimal(38,16), 6:decimal(38,26), 1:decimal(25,3) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(38,16)), _col2 (type: decimal(38,26)), _col3 (type: decimal(25,3)) Reducer 3 @@ -7242,10 +7239,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:double, 2:double, 3:bigint Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap @@ -7406,10 +7403,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:double, 2:double, 3:bigint Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap @@ -7649,10 +7646,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(15,3) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(15,3)) Execution mode: vectorized, llap @@ -7780,10 +7776,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(15,3) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(15,3)) Execution mode: vectorized, llap @@ -7911,10 +7906,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out new file mode 100644 index 0000000..6ddcef6 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out @@ -0,0 +1,1228 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 714 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 +0 10 15 BB +0 10 15 FF +1 20 25 NULL +2 NULL 50 NULL +NULL NULL NULL DD +NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 +0 10 15 +0 10 15 +1 20 25 +2 NULL 50 +NULL NULL NULL +NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out new file mode 100644 index 0000000..97d4e2a --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out @@ -0,0 +1,3945 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:bigint, 1:date + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 2:bigint, 0:bigint, 1:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 2:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:timestamp + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 2:smallint, 0:smallint, 1:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 2:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:smallint, 1:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:decimal(38,18) + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 2:int, 3:string, 0:int, 1:decimal(38,18) + smallTableValueMapping: 3:string + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 2:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:string, 0:int, 1:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 1:int, 0:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 2:smallint, 3:int, 0:smallint, 1:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 2:smallint, 3:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:smallint, 1:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string, VALUE._col0:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2, 3] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string, 3:decimal(38,18) + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 4, 1 -> 5, 2 -> 6 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 4:timestamp, 5:smallint, 6:string, 0:timestamp, 1:smallint, 2:string, 3:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 4:timestamp, 5:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 6:string, 0:timestamp, 1:smallint, 2:string, 3:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:25.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:36.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:29.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:date, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [string] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:string, 1:date, 2:timestamp + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 3:string, 0:string, 1:date, 2:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 3:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string, 1:date, 2:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 0000000..073a293 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,3945 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:bigint, 1:date + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 2:bigint, 0:bigint, 1:date + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 2:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint, 1:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:timestamp + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 2:smallint, 0:smallint, 1:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 2:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:smallint, 1:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:decimal(38,18) + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 2:int, 3:string, 0:int, 1:decimal(38,18) + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 2:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:string, 0:int, 1:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 1:int, 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 2:smallint, 3:int, 0:smallint, 1:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 2:smallint, 3:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:smallint, 1:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string, VALUE._col0:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2, 3] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string, 3:decimal(38,18) + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 4, 1 -> 5, 2 -> 6 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 4:timestamp, 5:smallint, 6:string, 0:timestamp, 1:smallint, 2:string, 3:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 4:timestamp, 5:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 6:string, 0:timestamp, 1:smallint, 2:string, 3:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:25.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:36.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:29.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:date, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [string] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:string, 1:date, 2:timestamp + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 3:string, 0:string, 1:date, 2:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 3:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string, 1:date, 2:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out new file mode 100644 index 0000000..d955a83 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out @@ -0,0 +1,3923 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint, col 1:date + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint + bigTableValueExpressions: col 0:smallint, col 1:timestamp + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:decimal(38,18) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 0 Map 1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + bigTableValueExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string, VALUE._col0:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:timestamp, col 1:smallint, col 2:string + bigTableValueExpressions: col 0:timestamp, col 1:smallint, col 2:string, col 3:decimal(38,18) + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:25.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:36.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:29.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:date, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [string] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string, col 1:date, col 2:timestamp + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:25.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:38.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:22.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 5c0d6bb..6eaf7ad 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -72,10 +72,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -209,10 +209,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -372,10 +372,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -632,11 +632,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -692,11 +692,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -986,11 +986,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Select Operator @@ -1021,11 +1021,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -1081,11 +1081,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -1184,11 +1184,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 6 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index 1ffa0fd..f018a61 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -84,10 +84,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -243,10 +242,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -402,10 +400,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -555,10 +552,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -708,10 +704,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -868,10 +863,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out index dce2930..ff300a0 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -87,11 +87,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -147,11 +147,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:int, 1:int + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -289,11 +289,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -349,11 +349,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:int, 1:int + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -501,11 +501,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -558,11 +557,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -620,11 +618,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1] + partitionColumns: 2:double + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 4 @@ -663,10 +661,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 5 @@ -808,11 +806,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -865,11 +862,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -927,11 +923,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1] + partitionColumns: 2:double + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 4 @@ -970,10 +966,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 5 @@ -1111,11 +1107,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1168,11 +1163,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) @@ -1180,11 +1174,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -1228,10 +1221,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: llap @@ -1251,6 +1243,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -1293,10 +1288,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1439,11 +1433,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1496,11 +1489,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) @@ -1508,11 +1500,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -1556,10 +1547,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: llap @@ -1579,6 +1569,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -1621,10 +1614,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1760,10 +1752,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -1926,10 +1918,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2004,10 +1995,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 @@ -2144,10 +2135,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) @@ -2155,10 +2145,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2219,10 +2208,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -2242,6 +2230,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -2284,10 +2275,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index 02f4683..c090051 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -101,11 +101,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), 1L (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 4] + keyColumns: 0:int, 1:int, 4:bigint keyExpressions: ConstantVectorExpression(val 1) -> 4:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -270,10 +270,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out index 1229c6d..eb5480d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -102,10 +102,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -264,10 +264,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -426,10 +426,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -588,10 +588,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -744,10 +744,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 9936 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -900,10 +899,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1041,10 +1039,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index daaf17f..195ea0c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -88,10 +88,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -147,10 +147,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -269,10 +269,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -328,10 +328,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -503,10 +503,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:double Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: double) Reducer 3 @@ -673,10 +673,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -732,10 +732,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out index dce648a..b15a993 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out @@ -368,10 +368,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:double, 4:bigint, 5:bigint Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out index d00306b..2350830 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -95,10 +95,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:decimal(20,2), 4:bigint, 5:bigint Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(20,2)), _col4 (type: bigint), _col5 (type: bigint) Execution mode: vectorized, llap @@ -234,10 +234,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:decimal(20,2), 4:bigint, 5:bigint Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(20,2)), _col4 (type: bigint), _col5 (type: bigint) Execution mode: vectorized, llap @@ -399,10 +399,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3, 4] + valueColumns: 2:decimal(20,2), 3:bigint, 4:bigint Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(20,2)), _col3 (type: bigint), _col4 (type: bigint) Execution mode: vectorized, llap @@ -458,10 +458,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:decimal(20,2), 4:bigint, 5:bigint Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(20,2)), _col4 (type: bigint), _col5 (type: bigint) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out index 3acc4ec..31ccb5e 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -97,10 +97,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Output Operator @@ -109,10 +109,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -184,10 +184,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) Reducer 3 @@ -208,6 +208,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -260,10 +263,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) @@ -342,10 +345,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Output Operator @@ -354,10 +357,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -429,10 +432,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) Reducer 3 @@ -453,6 +456,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -505,10 +511,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) @@ -618,10 +624,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -677,10 +683,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Output Operator @@ -689,10 +695,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -747,10 +753,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) Reducer 4 @@ -771,6 +777,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -823,10 +832,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index bbfba28..07c4eed 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -158,10 +157,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -280,10 +279,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -350,10 +348,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -499,10 +497,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -569,10 +566,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reducer 3 @@ -611,10 +608,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out index e26b6c5..4563bd6 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -231,10 +230,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index d75f2d8..2d41c9e 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -249,10 +248,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -417,10 +415,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -586,10 +583,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -657,10 +653,10 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 4] + keyColumns: 5:bigint, 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:int, 1:int Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reducer 3 @@ -792,10 +788,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -953,10 +948,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1128,10 +1122,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1290,10 +1283,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1354,11 +1346,11 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 4] + keyColumns: 5:bigint, 4:int keyExpressions: IfExprColumnNull(col 3:boolean, col 0:int, null)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 3:boolean, col 0:int) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:int, 1:int Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reducer 3 @@ -1490,10 +1482,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1646,10 +1637,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1804,10 +1794,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1953,10 +1942,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2119,10 +2107,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2285,10 +2272,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2446,10 +2432,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index 1f49804..6fb2578 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -97,10 +97,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) @@ -165,10 +165,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) @@ -307,10 +307,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) @@ -375,10 +375,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) @@ -517,10 +517,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) @@ -585,10 +585,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) @@ -725,10 +725,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 9936 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -790,10 +789,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 4968 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -929,10 +927,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -985,10 +982,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -1122,10 +1118,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -1181,10 +1177,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index 5d81631..7f94d6f 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -86,10 +86,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:int, 3:int Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: int) Execution mode: vectorized, llap @@ -153,11 +153,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 2] + keyColumns: 0:int, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:int + valueColumns: 1:int Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index c7f38bd..26795d2 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -57,6 +57,7 @@ STAGE PLANS: className: VectorMapJoinInnerMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Reducer 4 @@ -71,6 +72,7 @@ STAGE PLANS: className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col5 input vertices: 1 Reducer 5 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index d1f8ac5..abf352d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -84,10 +84,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -339,11 +339,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -399,11 +399,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -685,11 +685,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Select Operator @@ -720,11 +720,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -780,11 +780,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -873,11 +873,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 5 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out index 6182c90..edc1fae 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out @@ -86,10 +86,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -509,10 +508,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -576,10 +574,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 @@ -693,10 +690,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -766,10 +762,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 @@ -873,10 +868,9 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -928,10 +922,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out index 5e946c4..d3ba688 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out @@ -95,10 +95,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out index 041990a..c5e1dae 100644 --- ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out @@ -269,6 +269,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index 82a2980..3537c40 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -85,12 +85,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 2 @@ -160,10 +162,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -253,13 +254,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -333,10 +336,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -462,13 +464,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -538,10 +542,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -631,10 +635,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -685,14 +689,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0, 0, 1] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string, 0:int, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -795,14 +801,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -873,10 +881,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -967,14 +975,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -1044,10 +1054,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1138,14 +1148,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1215,10 +1227,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1308,10 +1320,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1362,14 +1374,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0, 1] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 @@ -1479,10 +1493,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1533,14 +1547,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0, 1] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 3:string, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index d4151e8..da4c84b 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -230,6 +230,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 4b2f06f..9238bc7 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -10,7 +10,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc_n0 POSTHOOK: Lineage: orcsrc_n0.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc_n0.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -18,7 +18,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -51,6 +51,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -72,6 +73,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -86,6 +88,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -94,6 +102,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -116,9 +125,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -141,6 +156,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -154,14 +170,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -192,26 +221,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -219,7 +229,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -251,6 +261,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -266,9 +277,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 3:string, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 1 Map 3 @@ -291,6 +308,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -304,6 +322,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string, bigint] Map 3 Map Operator Tree: TableScan @@ -311,6 +335,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -325,8 +350,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -340,14 +367,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -378,26 +418,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN @@ -405,7 +426,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN @@ -437,6 +458,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -451,6 +473,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -465,6 +488,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -472,6 +501,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -487,9 +517,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -512,6 +547,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -525,14 +561,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -563,26 +612,196 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM +PREHOOK: query: explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM +POSTHOOK: query: explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96342 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -593,7 +812,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -629,6 +848,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -650,6 +870,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -664,6 +885,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -672,6 +899,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -725,6 +953,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -738,6 +967,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -746,6 +981,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -767,6 +1003,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -781,14 +1018,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -819,32 +1069,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -855,7 +1080,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -890,6 +1115,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -904,6 +1130,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -918,6 +1145,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -925,6 +1158,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -939,8 +1173,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -954,6 +1190,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -961,6 +1203,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -975,6 +1218,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -989,6 +1233,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1011,14 +1261,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1049,32 +1309,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -1085,7 +1320,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -1120,6 +1355,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1134,6 +1370,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1148,6 +1385,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1155,6 +1398,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1169,8 +1413,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1184,6 +1430,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1191,6 +1443,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1205,6 +1458,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1219,6 +1473,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1241,14 +1501,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1279,32 +1549,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -1315,7 +1560,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -1350,6 +1595,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1364,6 +1610,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1378,6 +1625,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1385,6 +1638,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1399,8 +1653,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1414,6 +1670,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1421,6 +1683,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1435,6 +1698,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1449,6 +1713,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1471,14 +1741,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1509,32 +1789,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN @@ -1545,7 +1800,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN @@ -1580,6 +1835,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1594,6 +1850,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1608,6 +1865,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1615,6 +1878,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1629,8 +1893,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1644,6 +1910,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1651,6 +1923,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1665,6 +1938,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1679,6 +1953,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1701,14 +1981,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1739,28 +2029,1443 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM +PREHOOK: query: explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM +POSTHOOK: query: explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Full Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Left Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + Full Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Right Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -348019368476 + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + Full Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/vector_join_filters.q.out ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index 7c1780b..a49e8e2 100644 --- ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -47,15 +47,174 @@ POSTHOOK: Input: default@myinput1_n1 #### A masked pattern was here #### 4937935 Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -3080335 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: ((key > 40) and (value > 50) and (key = value)) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 + 1 + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 4:int + Statistics: Num rows: 4 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n1 @@ -128,42 +287,681 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n1 #### A masked pattern was here #### 4937935 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -3080335 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: ((key > 40) and (value > 50) and (key = value)) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -3080335 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: ((key > 40) and (value > 50) and (key = value)) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -3080335 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: ((key > 40) and (value > 50) and (key = value)) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n1 -#### A masked pattern was here #### -3080335 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: ((key > 40) and (value > 50) and (key = value)) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n1 a LEFT OUTER JOIN myinput1_n1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n1 diff --git ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out index 12db036..b8d76ed 100644 --- ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out @@ -47,15 +47,167 @@ POSTHOOK: Input: default@myinput1_n4 #### A masked pattern was here #### 13630578 Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -13630578 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 9 Data size: 153 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 4:int + Statistics: Num rows: 9 Data size: 153 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a JOIN myinput1_n4 b ON a.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n4 @@ -128,42 +280,643 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n4 #### A masked pattern was here #### 4542003 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -3079923 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.key +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.key PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.key +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.key POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -4509891 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.value = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.value = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.value = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.value = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -3113558 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key=b.key and a.value = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key=b.key and a.value = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key=b.key and a.value = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key=b.key and a.value = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n4 -#### A masked pattern was here #### -3079923 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n4 a LEFT OUTER JOIN myinput1_n4 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1_n4 c ON (b.value=c.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n4 diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 55be910..2438603 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -65,6 +65,7 @@ POSTHOOK: Output: default@tjoin1 POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE PREHOOK: type: QUERY PREHOOK: Input: default@tjoin2stage @@ -76,12 +77,14 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain vectorization expression +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -167,15 +170,21 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -257,15 +266,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -289,6 +300,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -346,6 +358,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -353,6 +371,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -367,8 +386,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -382,6 +403,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -399,15 +426,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -431,6 +460,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -488,6 +518,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -495,6 +531,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -509,8 +546,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -524,6 +563,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -541,15 +586,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -573,6 +620,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -591,9 +639,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 2:int, val 15) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int, 2:int, 4:char(2) + smallTableValueMapping: 4:char(2) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -627,6 +682,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -634,6 +695,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -648,8 +710,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -663,6 +727,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -680,15 +750,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -712,6 +784,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -730,9 +803,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 2:int, val 15) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int, 2:int, 4:char(2) + smallTableValueMapping: 4:char(2) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -766,6 +846,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -773,6 +859,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -787,8 +874,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -802,6 +891,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -819,6 +914,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 9c51b32..960f5f5 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -128,18 +128,100 @@ POSTHOOK: query: select * from t4_n19 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4_n19 #### A masked pattern was here #### -PREHOOK: query: explain vectorization only summary - +PREHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary - +POSTHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 @@ -156,16 +238,100 @@ POSTHOOK: Input: default@t2_n87 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 @@ -184,16 +350,100 @@ POSTHOOK: Input: default@t2_n87 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 @@ -204,18 +454,106 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t4_n19 #### A masked pattern was here #### -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value -PREHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: (key < 15) (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 15) (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### @@ -235,16 +573,100 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: ((value < 'val_10') and key is not null) (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value < 'val_10') and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 @@ -258,224 +680,1433 @@ POSTHOOK: Input: default@t2_n87 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n148 -PREHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -POSTHOOK: query: select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n148 -POSTHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -val_10 -val_8 -val_9 -PREHOOK: query: explain vectorization only summary -select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - -PREHOOK: query: select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n148 -PREHOOK: Input: default@t2_n87 -#### A masked pattern was here #### -POSTHOOK: query: select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n148 -POSTHOOK: Input: default@t2_n87 -#### A masked pattern was here #### -PREHOOK: query: explain vectorization only summary -select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - -PREHOOK: query: select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n148 -PREHOOK: Input: default@t2_n87 -#### A masked pattern was here #### -POSTHOOK: query: select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n148 -POSTHOOK: Input: default@t2_n87 -#### A masked pattern was here #### -10 val_5 -10 val_5 -10 val_5 -4 val_2 -8 val_4 -PREHOOK: query: explain vectorization only summary -select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n148 -PREHOOK: Input: default@t3_n35 +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n148 -POSTHOOK: Input: default@t3_n35 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 -PREHOOK: query: explain vectorization only summary -select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t3_n35 + filterExpr: (key > 5) (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -PREHOOK: query: select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 -PREHOOK: Input: default@t2_n87 +PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 -POSTHOOK: Input: default@t2_n87 +POSTHOOK: Input: default@t3_n35 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -8 val_8 -PREHOOK: query: explain vectorization only summary -select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value +val_10 +val_8 +val_9 +PREHOOK: query: explain vectorization expression +select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization expression +select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2_n87 + filterExpr: ((key > 5) and (value <= 'val_20')) (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 -PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 -POSTHOOK: Input: default@t3_n35 #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_5 -10 val_10 10 val_5 -10 val_10 10 val_5 -4 val_4 4 val_2 -8 val_8 8 val_4 -PREHOOK: query: explain vectorization only summary -select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: explain vectorization expression +select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization expression +select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1_n148 + filterExpr: (key > 2) (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +#### A masked pattern was here #### +10 val_5 +10 val_5 +10 val_5 +4 val_2 +8 val_4 +PREHOOK: query: explain vectorization expression +select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +2 +4 +4 +5 +5 +5 +8 +8 +9 +PREHOOK: query: explain vectorization expression +select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 (2 * _col0) (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: (2 * key) is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (2 * key) is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (2 * _col0) (type: int) + sort order: + + Map-reduce partition columns: (2 * _col0) (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +8 val_8 +PREHOOK: query: explain vectorization expression +select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +10 val_10 10 val_5 +10 val_10 10 val_5 +10 val_10 10 val_5 +4 val_4 4 val_2 +8 val_8 8 val_4 +PREHOOK: query: explain vectorization expression +select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int), value (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain vectorization expression +select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization expression +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization expression +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 POSTHOOK: Input: default@t3_n35 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain vectorization only summary -select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +NULL +NULL +NULL +PREHOOK: query: explain vectorization expression +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization expression +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -487,6 +2118,18 @@ POSTHOOK: Input: default@t3_n35 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -495,63 +2138,114 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization only summary -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization expression +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization expression +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n148 -PREHOOK: Input: default@t2_n87 -PREHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n148 -POSTHOOK: Input: default@t2_n87 -POSTHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -PREHOOK: query: explain vectorization only summary -select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Left Semi Join 1 to 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY @@ -594,16 +2288,122 @@ POSTHOOK: Input: default@t3_n35 NULL NULL NULL -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 @@ -645,23 +2445,136 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization only summary -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization expression +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization expression +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -689,25 +2602,149 @@ POSTHOOK: Input: default@t3_n35 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 @@ -762,16 +2799,122 @@ POSTHOOK: Input: default@t3_n35 NULL NULL NULL -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 @@ -819,16 +2962,92 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: ((key > 100) and value is not null) (type: boolean) + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 100) and value is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: b + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY PREHOOK: Input: default@t2_n87 @@ -839,10 +3058,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n87 POSTHOOK: Input: default@t3_n35 #### A masked pattern was here #### -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -881,6 +3100,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -949,10 +3169,10 @@ POSTHOOK: Input: default@t2_n87 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -991,6 +3211,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1061,10 +3282,10 @@ POSTHOOK: Input: default@t2_n87 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1103,6 +3324,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1165,10 +3387,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t4_n19 #### A masked pattern was here #### -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1207,6 +3429,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 @@ -1284,10 +3507,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1326,6 +3549,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1391,10 +3615,10 @@ POSTHOOK: Input: default@t2_n87 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1458,6 +3682,7 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 @@ -1502,10 +3727,10 @@ POSTHOOK: Input: default@t3_n35 val_10 val_8 val_9 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1569,6 +3794,7 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 @@ -1610,10 +3836,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 #### A masked pattern was here #### -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1677,6 +3903,7 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1719,10 +3946,10 @@ POSTHOOK: Input: default@t2_n87 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1761,6 +3988,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1842,10 +4070,10 @@ POSTHOOK: Input: default@t3_n35 8 8 9 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1884,6 +4112,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1950,10 +4179,10 @@ POSTHOOK: Input: default@t2_n87 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1985,25 +4214,34 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: llap LLAP IO: all inputs Map 3 @@ -2054,10 +4292,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2095,10 +4333,10 @@ POSTHOOK: Input: default@t3_n35 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2137,6 +4375,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -2213,10 +4452,10 @@ POSTHOOK: Input: default@t3_n35 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2248,20 +4487,29 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 - Left Semi Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 3 @@ -2320,10 +4568,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2361,10 +4609,10 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2392,20 +4640,32 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col0, _col5 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 3 @@ -2424,33 +4684,197 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2462,13 +4886,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -2500,10 +4924,13 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization summary +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2519,7 +4946,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2551,21 +4978,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -2573,28 +5004,40 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2647,10 +5090,10 @@ POSTHOOK: Input: default@t3_n35 NULL NULL NULL -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2666,44 +5109,73 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -2718,30 +5190,13 @@ STAGE PLANS: Reducer 2 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2794,11 +5249,11 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization summary -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false @@ -2813,7 +5268,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2821,33 +5277,52 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 5 @@ -2867,28 +5342,26 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2900,13 +5373,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -2934,19 +5407,30 @@ POSTHOOK: Input: default@t3_n35 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2962,7 +5446,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2970,33 +5455,52 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 5 @@ -3016,28 +5520,26 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3049,64 +5551,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n148 -PREHOOK: Input: default@t2_n87 -PREHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n148 -POSTHOOK: Input: default@t2_n87 -POSTHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 -NULL -NULL -NULL -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -3145,6 +5593,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Map Join Operator condition map: Left Outer Join 0 to 1 @@ -3155,6 +5604,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3266,10 +5716,10 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -3311,6 +5761,7 @@ STAGE PLANS: input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true File Output Operator compressed: false Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE @@ -3391,6 +5842,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3504,6 +5956,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3619,6 +6072,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3726,6 +6180,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3847,6 +6302,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3987,6 +6443,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4100,6 +6557,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4210,6 +6668,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4292,6 +6751,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4418,6 +6878,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4529,17 +6990,24 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - Select Vectorization: - className: VectorSelectOperator - native: true - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4677,6 +7145,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4773,7 +7242,167 @@ PREHOOK: query: explain vectorization only operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization only operator +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -4794,18 +7423,25 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - Filter Vectorization: - className: VectorFilterOperator - native: true - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4821,21 +7457,10 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - Filter Vectorization: - className: VectorFilterOperator + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator native: true - Select Vectorization: - className: VectorSelectOperator - native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4844,7 +7469,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -4896,13 +7521,13 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -4914,6 +7539,18 @@ POSTHOOK: Input: default@t3_n35 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -4922,11 +7559,11 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization only operator -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only operator -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -4939,23 +7576,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4964,17 +7606,25 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 3 + Map 4 Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4986,22 +7636,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 5 Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5014,6 +7688,37 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -5022,23 +7727,37 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator + limit: -1 + Processor Tree: + ListSink -PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -5070,10 +7789,13 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization only operator +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only operator +POSTHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -5087,18 +7809,28 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5112,12 +7844,20 @@ STAGE PLANS: vectorized: true Map 4 Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5131,20 +7871,44 @@ STAGE PLANS: vectorized: true Map 5 Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5157,6 +7921,36 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5166,57 +7960,30 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator + limit: -1 + Processor Tree: + ListSink -PREHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n148 -PREHOOK: Input: default@t2_n87 -PREHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n148 -POSTHOOK: Input: default@t2_n87 -POSTHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -NULL -NULL -NULL PREHOOK: query: explain vectorization only operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -5235,17 +8002,32 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5254,25 +8036,28 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 3 Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5284,7 +8069,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan Vectorization: native: true @@ -5304,7 +8089,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -5365,10 +8149,10 @@ POSTHOOK: Input: default@t3_n35 8 8 PREHOOK: query: explain vectorization only operator -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -5382,17 +8166,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5401,25 +8195,28 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true Map 4 Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5451,6 +8248,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5470,13 +8270,13 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -5504,12 +8304,23 @@ POSTHOOK: Input: default@t3_n35 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 -NULL -NULL +9 NULL NULL NULL @@ -5531,17 +8342,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5550,25 +8371,28 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true Map 4 Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5600,6 +8424,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5619,60 +8446,6 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1_n148 -PREHOOK: Input: default@t2_n87 -PREHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1_n148 -POSTHOOK: Input: default@t2_n87 -POSTHOOK: Input: default@t3_n35 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 -NULL -NULL -NULL PREHOOK: query: explain vectorization only operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -5702,11 +8475,13 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false Map Join Vectorization: + bigTableKeyExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true @@ -5872,6 +8647,7 @@ STAGE PLANS: className: VectorSelectOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -5934,10 +8710,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n87 POSTHOOK: Input: default@t3_n35 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -5965,12 +8741,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -5981,7 +8755,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -5990,15 +8763,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6011,12 +8783,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -6025,12 +8791,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6039,16 +8803,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -6059,10 +8820,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6075,27 +8834,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -6103,7 +8849,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6138,10 +8883,10 @@ POSTHOOK: Input: default@t2_n87 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6169,12 +8914,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -6185,7 +8928,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6194,15 +8936,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6215,12 +8956,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -6229,12 +8964,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6243,16 +8976,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -6263,10 +8993,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6279,27 +9007,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -6307,7 +9022,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6344,10 +9058,10 @@ POSTHOOK: Input: default@t2_n87 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6375,12 +9089,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -6391,7 +9103,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6400,15 +9111,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6421,12 +9131,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -6435,12 +9139,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6449,16 +9151,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -6469,10 +9168,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6485,27 +9182,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -6513,7 +9197,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6542,10 +9225,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t4_n19 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6573,12 +9256,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -6589,7 +9270,6 @@ STAGE PLANS: 1 _col1 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6598,23 +9278,21 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6627,12 +9305,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -6641,12 +9313,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColLessLongScalar(col 0:int, val 15) predicate: (key < 15) (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6655,16 +9325,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -6675,10 +9342,8 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6691,27 +9356,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -6719,7 +9371,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6759,10 +9410,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6790,12 +9441,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -6806,7 +9455,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6815,15 +9463,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6836,12 +9483,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -6850,12 +9491,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1:string, val val_10), SelectColumnIsNotNull(col 0:int)) predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6864,16 +9503,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -6884,10 +9520,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6900,27 +9534,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -6928,7 +9549,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6960,10 +9580,10 @@ POSTHOOK: Input: default@t2_n87 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6991,12 +9611,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 5) predicate: (key > 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7005,16 +9623,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7025,10 +9640,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7041,12 +9654,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -7055,12 +9662,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7071,7 +9676,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7080,23 +9684,21 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7109,27 +9711,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -7137,7 +9726,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7169,10 +9757,10 @@ POSTHOOK: Input: default@t3_n35 val_10 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7200,12 +9788,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 5), FilterStringGroupColLessEqualStringScalar(col 1:string, val val_20)) predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7214,16 +9800,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -7234,10 +9817,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7250,12 +9831,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -7264,12 +9839,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7280,7 +9853,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7289,23 +9861,21 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7318,27 +9888,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -7346,7 +9903,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7375,10 +9931,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7406,12 +9962,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (key > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7420,16 +9974,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7440,10 +9991,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7456,12 +10005,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -7470,12 +10013,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7486,7 +10027,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7495,15 +10035,14 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7516,27 +10055,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -7544,7 +10070,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7578,10 +10103,10 @@ POSTHOOK: Input: default@t2_n87 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7609,12 +10134,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7625,7 +10148,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7634,15 +10156,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7655,12 +10176,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -7669,12 +10184,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7683,16 +10196,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7703,10 +10213,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7719,27 +10227,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -7747,7 +10242,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7795,10 +10289,10 @@ POSTHOOK: Input: default@t3_n35 8 8 9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7826,12 +10320,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7842,7 +10334,6 @@ STAGE PLANS: 1 (2 * _col0) (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7851,15 +10342,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7872,12 +10362,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -7886,12 +10370,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7900,16 +10382,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7920,11 +10399,8 @@ STAGE PLANS: Map-reduce partition columns: (2 * _col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] - keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7937,27 +10413,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -7965,7 +10428,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7998,10 +10460,10 @@ POSTHOOK: Input: default@t2_n87 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -8029,53 +10491,62 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8087,12 +10558,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string] Map 3 Map Operator Tree: TableScan @@ -8101,12 +10566,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -8115,10 +10578,8 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap @@ -8132,12 +10593,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -8146,12 +10601,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -8160,16 +10613,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8180,10 +10630,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8196,27 +10644,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -8224,14 +10659,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8269,10 +10703,10 @@ POSTHOOK: Input: default@t3_n35 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -8300,12 +10734,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8316,7 +10748,6 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: bigTableKeyExpressions: col 0:int, col 1:string - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -8325,15 +10756,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8346,12 +10776,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -8360,12 +10784,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -8374,16 +10796,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -8394,10 +10813,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8410,27 +10827,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -8438,7 +10842,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -8481,10 +10884,10 @@ POSTHOOK: Input: default@t3_n35 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -8512,44 +10915,54 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 - Left Semi Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8561,12 +10974,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -8575,12 +10982,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -8589,16 +10994,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8609,10 +11011,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8622,15 +11022,235 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization operator +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -8639,12 +11259,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -8653,16 +11271,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8673,10 +11288,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8689,27 +11302,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8717,14 +11317,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8736,13 +11335,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -8754,6 +11353,18 @@ POSTHOOK: Input: default@t3_n35 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -8762,11 +11373,11 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -8781,48 +11392,26 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8831,34 +11420,25 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8870,51 +11450,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8926,27 +11501,46 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8954,14 +11548,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8973,13 +11566,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -9011,10 +11604,13 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -9030,7 +11626,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -9041,17 +11637,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9064,12 +11657,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -9077,17 +11664,14 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9100,51 +11684,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9156,44 +11735,46 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9201,14 +11782,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -9261,10 +11841,10 @@ POSTHOOK: Input: default@t3_n35 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -9280,29 +11860,66 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9311,54 +11928,49 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9370,30 +11982,21 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9406,44 +12009,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9451,14 +12024,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -9511,11 +12083,11 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -9530,7 +12102,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -9538,21 +12111,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9561,54 +12155,49 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9620,12 +12209,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -9633,17 +12216,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9656,44 +12236,32 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9701,14 +12269,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -9720,13 +12287,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -9754,19 +12321,30 @@ POSTHOOK: Input: default@t3_n35 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -9782,7 +12360,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -9790,21 +12369,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9813,54 +12413,49 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9872,12 +12467,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -9885,17 +12474,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9908,44 +12494,32 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9953,14 +12527,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -10026,10 +12599,10 @@ POSTHOOK: Input: default@t3_n35 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10057,12 +12630,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10073,7 +12644,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -10082,6 +12652,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Map Join Operator condition map: Left Outer Join 0 to 1 @@ -10090,7 +12661,6 @@ STAGE PLANS: 1 value (type: string) Map Join Vectorization: bigTableKeyExpressions: col 1:string - bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true @@ -10099,15 +12669,14 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10120,12 +12689,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -10134,12 +12697,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10148,16 +12709,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10168,10 +12726,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10184,12 +12740,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -10197,17 +12747,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10220,27 +12767,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -10248,7 +12782,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -10314,10 +12847,10 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10344,12 +12877,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 100), SelectColumnIsNotNull(col 1:string)) predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10358,7 +12889,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -10368,7 +12898,6 @@ STAGE PLANS: 1 _col0 (type: string) Map Join Vectorization: bigTableKeyExpressions: col 1:string - bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -10377,6 +12906,7 @@ STAGE PLANS: input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true File Output Operator compressed: false File Sink Vectorization: @@ -10398,12 +12928,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -10412,12 +12936,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10426,16 +12948,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -10446,10 +12965,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10462,12 +12979,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -10485,10 +12996,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n87 POSTHOOK: Input: default@t3_n35 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10516,12 +13027,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10531,13 +13040,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10547,10 +13052,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10563,12 +13066,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -10577,12 +13074,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10591,16 +13086,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10611,10 +13103,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10627,27 +13117,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -10655,7 +13132,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -10690,10 +13166,10 @@ POSTHOOK: Input: default@t2_n87 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10721,12 +13197,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10736,13 +13210,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10752,10 +13222,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10768,12 +13236,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -10782,12 +13244,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10796,16 +13256,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10816,10 +13273,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10832,27 +13287,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -10860,7 +13302,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -10897,10 +13338,10 @@ POSTHOOK: Input: default@t2_n87 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10928,12 +13369,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10943,13 +13382,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10959,10 +13394,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10975,12 +13408,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -10989,12 +13416,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11003,16 +13428,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11023,10 +13445,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11039,27 +13459,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -11067,7 +13474,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11096,10 +13502,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t4_n19 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11127,12 +13533,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11142,13 +13546,9 @@ STAGE PLANS: 0 key (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -11159,17 +13559,14 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11182,12 +13579,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -11196,12 +13587,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColLessLongScalar(col 0:int, val 15) predicate: (key < 15) (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11210,16 +13599,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -11230,10 +13616,8 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11246,27 +13630,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -11274,7 +13645,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11314,10 +13684,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11345,12 +13715,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11360,13 +13728,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -11376,10 +13740,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11392,12 +13754,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -11406,12 +13762,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1:string, val val_10), SelectColumnIsNotNull(col 0:int)) predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11420,16 +13774,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -11440,10 +13791,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11456,27 +13805,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -11484,7 +13820,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11516,10 +13851,10 @@ POSTHOOK: Input: default@t2_n87 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11547,12 +13882,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 5) predicate: (key > 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11561,16 +13894,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11581,10 +13911,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11597,12 +13925,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -11611,12 +13933,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11626,13 +13946,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -11643,17 +13959,14 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11666,27 +13979,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -11694,7 +13994,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11726,10 +14025,10 @@ POSTHOOK: Input: default@t3_n35 val_10 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11757,12 +14056,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 5), FilterStringGroupColLessEqualStringScalar(col 1:string, val val_20)) predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11771,16 +14068,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -11791,10 +14085,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11807,12 +14099,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -11821,12 +14107,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11836,13 +14120,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -11853,17 +14133,14 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11876,27 +14153,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -11904,7 +14168,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11933,10 +14196,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11964,12 +14227,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (key > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11978,16 +14239,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11998,10 +14256,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12014,12 +14270,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -12028,12 +14278,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12043,13 +14291,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 1 @@ -12059,10 +14303,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12075,27 +14317,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -12103,7 +14332,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -12137,10 +14365,10 @@ POSTHOOK: Input: default@t2_n87 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -12168,12 +14396,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12183,13 +14409,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -12199,10 +14421,8 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12215,12 +14435,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -12229,12 +14443,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -12243,16 +14455,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12263,10 +14472,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12279,27 +14486,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -12307,7 +14501,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -12355,10 +14548,10 @@ POSTHOOK: Input: default@t3_n35 8 8 9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -12386,12 +14579,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12401,13 +14592,9 @@ STAGE PLANS: 0 key (type: int) 1 (2 * _col0) (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12417,10 +14604,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12433,12 +14618,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -12447,12 +14626,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -12461,16 +14638,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12481,11 +14655,8 @@ STAGE PLANS: Map-reduce partition columns: (2 * _col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] - keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12498,27 +14669,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -12526,7 +14684,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -12559,10 +14716,10 @@ POSTHOOK: Input: default@t2_n87 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -12590,53 +14747,56 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12645,15 +14805,9 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string] Map 3 Map Operator Tree: TableScan @@ -12662,12 +14816,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -12676,10 +14828,8 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap @@ -12693,12 +14843,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -12707,12 +14851,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -12721,16 +14863,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12741,10 +14880,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12757,27 +14894,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -12785,14 +14909,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -12830,10 +14953,10 @@ POSTHOOK: Input: default@t3_n35 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -12861,12 +14984,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12876,13 +14997,9 @@ STAGE PLANS: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0, 1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12892,10 +15009,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12908,12 +15023,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -12922,12 +15031,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -12936,16 +15043,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -12956,10 +15060,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12972,27 +15074,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -13000,7 +15089,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -13043,10 +15131,10 @@ POSTHOOK: Input: default@t3_n35 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -13074,44 +15162,48 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 - Left Semi Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13120,15 +15212,9 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -13137,12 +15223,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -13151,16 +15235,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13171,10 +15252,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13184,15 +15263,229 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization operator +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -13201,12 +15494,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -13215,16 +15506,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13235,10 +15523,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13251,27 +15537,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13279,14 +15552,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -13298,13 +15570,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -13316,6 +15588,18 @@ POSTHOOK: Input: default@t3_n35 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -13324,11 +15608,11 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -13343,48 +15627,26 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13393,34 +15655,25 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13432,51 +15685,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13488,27 +15736,45 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13516,14 +15782,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -13535,13 +15800,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -13573,10 +15838,13 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -13592,7 +15860,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -13603,17 +15871,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13626,12 +15891,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -13639,17 +15898,14 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13662,51 +15918,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13718,44 +15969,45 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13763,14 +16015,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -13823,10 +16074,10 @@ POSTHOOK: Input: default@t3_n35 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -13842,29 +16093,60 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13876,51 +16158,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13932,30 +16209,21 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13968,44 +16236,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -14013,14 +16251,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14073,11 +16310,11 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -14092,7 +16329,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -14100,21 +16338,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14126,51 +16382,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14182,12 +16433,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -14195,17 +16440,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14218,44 +16460,32 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -14263,14 +16493,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14282,13 +16511,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -14316,19 +16545,30 @@ POSTHOOK: Input: default@t3_n35 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14344,7 +16584,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -14352,21 +16593,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14378,51 +16637,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14434,12 +16688,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -14447,17 +16695,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14470,44 +16715,32 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -14515,14 +16748,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14588,10 +16820,10 @@ POSTHOOK: Input: default@t3_n35 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14619,12 +16851,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -14634,13 +16864,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -14652,13 +16878,9 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 4 @@ -14668,10 +16890,8 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14684,12 +16904,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -14698,12 +16912,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -14712,16 +16924,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -14732,10 +16941,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14748,12 +16955,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -14761,17 +16962,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14784,27 +16982,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -14812,7 +16997,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -14878,10 +17062,10 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14908,12 +17092,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 100), SelectColumnIsNotNull(col 1:string)) predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -14922,7 +17104,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -14931,13 +17112,9 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinLeftSemiStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 2 @@ -14963,12 +17140,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -14977,12 +17148,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -14991,16 +17160,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -15011,10 +17177,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15027,12 +17191,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -15050,10 +17208,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n87 POSTHOOK: Input: default@t3_n35 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15081,12 +17239,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -15096,26 +17252,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15128,12 +17279,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15142,12 +17287,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15156,16 +17299,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -15176,10 +17316,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15192,27 +17330,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -15220,7 +17345,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -15255,10 +17379,10 @@ POSTHOOK: Input: default@t2_n87 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2_n87 a left semi join t1_n148 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15286,12 +17410,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -15301,26 +17423,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15333,12 +17450,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15347,12 +17458,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15361,16 +17470,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -15381,10 +17487,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15397,27 +17501,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -15425,7 +17516,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -15462,10 +17552,10 @@ POSTHOOK: Input: default@t2_n87 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t4_n19 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15493,12 +17583,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -15508,26 +17596,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15540,12 +17623,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15554,12 +17631,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15568,16 +17643,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -15588,10 +17660,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15604,27 +17674,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -15632,7 +17689,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -15661,10 +17717,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t4_n19 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join t3_n35 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15692,12 +17748,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -15707,34 +17761,28 @@ STAGE PLANS: 0 key (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 3 Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15747,12 +17795,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15761,12 +17803,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColLessLongScalar(col 0:int, val 15) predicate: (key < 15) (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15775,16 +17815,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -15795,10 +17832,8 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15811,27 +17846,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -15839,7 +17861,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -15879,10 +17900,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15910,12 +17931,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -15925,26 +17944,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15957,12 +17971,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15971,12 +17979,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1:string, val val_10), SelectColumnIsNotNull(col 0:int)) predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15985,16 +17991,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -16005,10 +18008,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16021,27 +18022,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -16049,7 +18037,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16081,10 +18068,10 @@ POSTHOOK: Input: default@t2_n87 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key from t3_n35 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16112,12 +18099,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 5) predicate: (key > 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -16126,16 +18111,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -16146,10 +18128,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16162,12 +18142,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -16176,12 +18150,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16191,34 +18163,28 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16231,27 +18197,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -16259,7 +18212,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16291,10 +18243,10 @@ POSTHOOK: Input: default@t3_n35 val_10 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1_n148 a left semi join (select key , value from t2_n87 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16322,12 +18274,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 5), FilterStringGroupColLessEqualStringScalar(col 1:string, val val_20)) predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -16336,16 +18286,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -16356,10 +18303,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16372,12 +18317,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -16386,12 +18325,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16401,34 +18338,28 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16441,27 +18372,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -16469,7 +18387,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16498,10 +18415,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2_n87 a left semi join (select key , value from t1_n148 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16529,12 +18446,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (key > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -16543,16 +18458,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -16563,10 +18475,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16579,12 +18489,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -16593,12 +18497,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16608,26 +18510,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16640,27 +18537,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -16668,7 +18552,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16702,10 +18585,10 @@ POSTHOOK: Input: default@t2_n87 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16733,12 +18616,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16748,26 +18629,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16780,12 +18656,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -16794,12 +18664,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -16808,16 +18676,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -16828,10 +18693,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16844,27 +18707,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -16872,7 +18722,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16920,10 +18769,10 @@ POSTHOOK: Input: default@t3_n35 8 8 9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a left semi join t2_n87 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16951,12 +18800,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16966,26 +18813,21 @@ STAGE PLANS: 0 key (type: int) 1 (2 * _col0) (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16998,12 +18840,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -17012,12 +18848,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -17026,16 +18860,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -17046,11 +18877,8 @@ STAGE PLANS: Map-reduce partition columns: (2 * _col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] - keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17063,27 +18891,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -17091,7 +18906,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -17124,10 +18938,10 @@ POSTHOOK: Input: default@t2_n87 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1_n148 a join t2_n87 b on a.key = b.key left semi join t3_n35 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -17155,53 +18969,58 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17210,15 +19029,9 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string] Map 3 Map Operator Tree: TableScan @@ -17227,12 +19040,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -17241,10 +19052,8 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap @@ -17258,12 +19067,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -17272,12 +19075,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -17286,16 +19087,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -17306,10 +19104,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17322,27 +19118,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -17350,14 +19133,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17395,10 +19177,10 @@ POSTHOOK: Input: default@t3_n35 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t3_n35 a left semi join t1_n148 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -17426,12 +19208,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -17441,26 +19221,21 @@ STAGE PLANS: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0, 1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17473,12 +19248,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -17487,12 +19256,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -17501,16 +19268,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -17521,10 +19285,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17537,27 +19299,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -17565,7 +19314,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -17608,10 +19356,10 @@ POSTHOOK: Input: default@t3_n35 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -17639,44 +19387,50 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 - Left Semi Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17685,15 +19439,9 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -17702,12 +19450,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -17716,16 +19462,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -17736,10 +19479,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17749,15 +19490,231 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: c + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1_n148 +PREHOOK: Input: default@t2_n87 +PREHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1_n148 +POSTHOOK: Input: default@t2_n87 +POSTHOOK: Input: default@t3_n35 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization operator +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -17766,12 +19723,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -17780,16 +19735,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -17800,10 +19752,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17816,27 +19766,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -17844,14 +19781,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17863,13 +19799,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key left semi join t2_n87 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -17881,6 +19817,18 @@ POSTHOOK: Input: default@t3_n35 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -17889,11 +19837,11 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -17908,48 +19856,26 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17958,34 +19884,25 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17997,51 +19914,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18053,27 +19965,46 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -18081,14 +20012,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -18100,13 +20030,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left outer join t1_n148 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -18138,10 +20068,13 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t1_n148 a full outer join t3_n35 b on a.key = b.key left semi join t2_n87 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -18157,7 +20090,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -18168,17 +20101,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18191,12 +20121,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -18204,17 +20128,14 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18227,51 +20148,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan alias: c + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18283,44 +20199,46 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -18328,14 +20246,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -18388,10 +20305,10 @@ POSTHOOK: Input: default@t3_n35 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -18407,29 +20324,62 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18441,51 +20391,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18497,30 +20442,21 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18533,44 +20469,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -18578,14 +20484,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -18638,11 +20543,11 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -18657,7 +20562,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -18665,21 +20571,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18691,51 +20616,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18747,12 +20667,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -18760,17 +20674,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18783,44 +20694,32 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -18828,14 +20727,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -18847,13 +20745,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1_n148 PREHOOK: Input: default@t2_n87 PREHOOK: Input: default@t3_n35 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key right outer join t1_n148 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n148 POSTHOOK: Input: default@t2_n87 @@ -18881,19 +20779,30 @@ POSTHOOK: Input: default@t3_n35 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t1_n148 b on a.key = b.key full outer join t2_n87 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -18909,7 +20818,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -18917,21 +20827,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a + filterExpr: key is not null (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18943,51 +20872,46 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan alias: b + filterExpr: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18999,12 +20923,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -19012,17 +20930,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19035,44 +20950,32 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -19080,14 +20983,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -19153,10 +21055,10 @@ POSTHOOK: Input: default@t3_n35 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.key = b.key left outer join t1_n148 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -19184,12 +21086,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -19199,17 +21099,14 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Map Join Operator condition map: Left Outer Join 0 to 1 @@ -19217,26 +21114,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 4 Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19249,12 +21141,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -19263,12 +21149,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -19277,16 +21161,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -19297,10 +21178,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19313,12 +21192,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -19326,17 +21199,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19349,27 +21219,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -19377,7 +21234,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -19443,10 +21299,10 @@ POSTHOOK: Input: default@t3_n35 4 8 8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3_n35 a left semi join t2_n87 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -19473,12 +21329,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 100), SelectColumnIsNotNull(col 1:string)) predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -19487,7 +21341,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -19496,17 +21349,14 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinLeftSemiStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true File Output Operator compressed: false File Sink Vectorization: @@ -19528,12 +21378,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -19542,12 +21386,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -19556,16 +21398,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -19576,10 +21415,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19592,12 +21429,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_like_2.q.out ql/src/test/results/clientpositive/llap/vector_like_2.q.out index 849d6c6..5125d40 100644 --- ql/src/test/results/clientpositive/llap/vector_like_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_like_2.q.out @@ -63,10 +63,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:boolean Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out index f503761..c12254c 100644 --- ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out +++ ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out @@ -104,10 +104,9 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:varchar(10), 1:int, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 10 Data size: 2820 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out index 79d7746..3e20484 100644 --- ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out @@ -167,11 +167,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:int + valueColumns: 1:string Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -223,14 +223,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 4, 1] - smallTableMapping: [4] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 0:int, 4:string, 1:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 @@ -248,10 +250,9 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 4, 1] + keyColumns: 0:int, 4:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 399 Data size: 74214 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 94733cf..3364035 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -203,6 +203,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 @@ -217,6 +218,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 @@ -477,6 +479,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3, _col4 input vertices: 1 Map 3 @@ -491,6 +494,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 diff --git ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out index e81d7df..58c00b6 100644 --- ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out @@ -354,6 +354,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -383,6 +386,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index 473514f..78d1e38 100644 --- ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -846,26 +846,141 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -10 NULL NULL 10 -100 100 100 100 -NULL 10 10 NULL -NULL 10 48 NULL -NULL 10 NULL NULL -NULL 35 10 NULL -NULL 35 48 NULL -NULL 35 NULL NULL -NULL NULL 10 NULL -NULL NULL 48 NULL -NULL NULL NULL 35 -NULL NULL NULL NULL + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[value]] + keys: + 0 key (type: int) + 1 value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -1745,26 +1860,141 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -10 NULL NULL 10 -100 100 100 100 -NULL 10 10 NULL -NULL 10 48 NULL -NULL 10 NULL NULL -NULL 35 10 NULL -NULL 35 48 NULL -NULL 35 NULL NULL -NULL NULL 10 NULL -NULL NULL 48 NULL -NULL NULL NULL 35 -NULL NULL NULL NULL + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[value]] + keys: + 0 key (type: int) + 1 value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out index 7bc7fb2..5e7e28d 100644 --- ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out @@ -1492,6 +1492,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1638,6 +1641,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1811,6 +1817,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1949,6 +1958,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -2110,6 +2122,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_order_null.q.out ql/src/test/results/clientpositive/llap/vector_order_null.q.out index cb4053e..db428e6 100644 --- ql/src/test/results/clientpositive/llap/vector_order_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_order_null.q.out @@ -116,10 +116,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -238,10 +237,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -360,10 +358,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -482,10 +479,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -604,10 +600,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -726,10 +721,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -848,10 +842,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -970,10 +963,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1092,10 +1084,9 @@ STAGE PLANS: sort order: +- Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1214,10 +1205,9 @@ STAGE PLANS: sort order: -- Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1336,10 +1326,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index aa0a46f..70fd697 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -107,15 +107,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableOuterKeyMapping: 1 -> 3 - bigTableRetainedColumnNums: [0, 1, 3] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 4] - smallTableMapping: [4] + outerSmallTableKeyMapping: 1 -> 3 + projectedOutput: 0:string, 1:int, 3:int, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -169,10 +170,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -259,10 +260,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -305,15 +306,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableOuterKeyMapping: 0 -> 4 - bigTableRetainedColumnNums: [0, 1, 4] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 4, 0, 1] - smallTableMapping: [3] + outerSmallTableKeyMapping: 0 -> 4 + projectedOutput: 3:string, 4:int, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index c74a588..f7a766a 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -267,15 +267,16 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableOuterKeyMapping: 2 -> 15 - bigTableRetainedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15] - bigTableValueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableValueColumns: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - smallTableMapping: [13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24] + outerSmallTableKeyMapping: 2 -> 15 + projectedOutput: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean, 13:tinyint, 14:smallint, 15:int, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + smallTableValueMapping: 13:tinyint, 14:smallint, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 @@ -329,10 +330,10 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11] + valueColumns: 0:tinyint, 1:smallint, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap @@ -443,13 +444,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -503,10 +505,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -709,13 +710,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 3 @@ -727,13 +729,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 4 @@ -754,10 +757,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -799,10 +801,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -843,10 +844,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index 2e90aae..c4f2956 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -284,13 +284,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -302,13 +303,14 @@ STAGE PLANS: 0 _col1 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: - bigTableKeyColumnNums: [3] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 3:bigint + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 4 @@ -329,10 +331,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -374,10 +375,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -418,10 +418,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index ba2d9df..5482f60 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -296,10 +296,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -353,11 +352,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int keyExpressions: ConstantVectorExpression(val 0) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Reducer 3 @@ -517,10 +516,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(15,2), 1:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -576,11 +575,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 3 @@ -747,10 +746,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -800,10 +799,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -845,6 +843,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -881,11 +882,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 4 @@ -1056,10 +1057,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1108,10 +1108,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -1154,6 +1154,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1190,11 +1193,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 4 @@ -1364,10 +1367,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -1417,10 +1420,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -1463,6 +1466,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Vectorization: @@ -1594,10 +1600,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(17,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(17,2)) Execution mode: vectorized, llap @@ -1651,11 +1656,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int keyExpressions: ConstantVectorExpression(val 0) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(17,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(17,2)) Reducer 3 @@ -1815,10 +1820,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(7,2), 1:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap @@ -1874,11 +1879,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(7,2), 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(7,2) + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Reducer 3 @@ -2045,10 +2050,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2098,10 +2103,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -2143,6 +2147,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2179,11 +2186,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(7,2), 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(7,2) + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Reducer 4 @@ -2354,10 +2361,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -2406,10 +2412,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2452,6 +2458,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2488,11 +2497,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(7,2), 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(7,2) + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Reducer 4 @@ -2662,10 +2671,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2715,10 +2724,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2761,6 +2770,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index ef4934e..8bec4aa 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -98,11 +98,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2, 3] + partitionColumns: 0:string + valueColumns: 2:double, 3:bigint Statistics: Num rows: 100 Data size: 18816 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 44bfe20..ef6cd86 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -142,10 +142,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -402,10 +402,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -631,10 +631,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -860,11 +860,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1122,11 +1122,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1352,11 +1352,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1582,12 +1582,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1845,12 +1845,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2076,12 +2076,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2301,10 +2301,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2529,10 +2529,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2757,10 +2757,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2985,11 +2985,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -3215,11 +3215,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -3445,11 +3445,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -3675,12 +3675,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3906,12 +3906,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4137,12 +4137,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4410,10 +4410,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:decimal(38,18) Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) Execution mode: vectorized, llap @@ -4638,11 +4638,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:decimal(38,18) Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: decimal(38,18)) Execution mode: vectorized, llap @@ -4888,10 +4888,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_bigint (type: bigint) Execution mode: vectorized, llap @@ -5116,11 +5116,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:bigint Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_bigint (type: bigint) Execution mode: vectorized, llap @@ -5340,10 +5340,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:double Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -5539,11 +5539,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -5740,11 +5740,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 6] + keyColumns: 0:string, 6:timestamp keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -5940,12 +5940,12 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 6, 1] + keyColumns: 0:string, 6:timestamp, 1:string keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 9] - valueColumnNums: [2] + partitionColumns: 0:string, 9:timestamp + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6272,10 +6272,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:double Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6471,11 +6471,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6672,12 +6672,12 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 6, 1] + keyColumns: 0:string, 6:timestamp, 1:string keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 9] - valueColumnNums: [2] + partitionColumns: 0:string, 9:timestamp + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6874,11 +6874,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 6] + keyColumns: 0:string, 6:timestamp keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out index 038300e..2f604bb 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out @@ -90,13 +90,15 @@ STAGE PLANS: 0 one (type: int), two (type: int) 1 1 (type: int), 2 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0, 1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index 4a10b3b..0b82230 100644 --- ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -129,10 +129,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + valueColumns: 0:bigint, 1:bigint, 2:double, 3:double, 4:double, 5:double, 6:double, 7:bigint, 8:double, 9:bigint, 10:tinyint Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: bigint), _col10 (type: tinyint) Execution mode: vectorized, llap @@ -337,10 +336,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + valueColumns: 0:bigint, 1:bigint, 2:double, 3:double, 4:double, 5:double, 6:double, 7:bigint, 8:double, 9:bigint, 10:tinyint Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: bigint), _col10 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ql/src/test/results/clientpositive/llap/vector_topnkey.q.out index 16803c9..bf5cd7e 100644 --- ql/src/test/results/clientpositive/llap/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -67,10 +67,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -126,10 +126,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -259,10 +259,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -315,10 +314,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -434,10 +432,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -486,10 +483,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -530,6 +527,9 @@ STAGE PLANS: Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 5db37eb..26d695b 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2800,10 +2800,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap @@ -2943,10 +2942,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 53327bd..f924f04 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -43,11 +43,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -279,11 +279,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string, 2:int + valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -339,11 +339,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2, 3] + partitionColumns: 1:string + valueColumns: 2:int, 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: double) Reducer 3 @@ -529,11 +529,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string, 2:int + valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -589,11 +589,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2, 3] + partitionColumns: 1:string + valueColumns: 2:int, 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: double) Reducer 3 @@ -748,11 +748,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -942,11 +942,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1143,11 +1143,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1350,11 +1350,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1396,11 +1396,10 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1471,6 +1470,9 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Vectorization: @@ -1636,11 +1638,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1, 5] + keyColumns: 2:string, 1:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:string Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1826,11 +1827,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2035,11 +2036,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2246,11 +2247,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2436,12 +2437,12 @@ STAGE PLANS: Map-reduce partition columns: 'Manufacturer#3' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 1] + keyColumns: 10:string, 1:string keyExpressions: ConstantVectorExpression(val Manufacturer#3) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [5] + partitionColumns: 11:string + valueColumns: 5:int Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2603,11 +2604,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2769,11 +2770,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2988,11 +2989,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3279,11 +3280,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3557,11 +3558,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3658,11 +3659,11 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3, 4, 2] + partitionColumns: 0:string + valueColumns: 3:bigint, 4:bigint, 2:int Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) Reducer 3 @@ -3809,11 +3810,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4022,11 +4023,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:int, 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [4, 5] + partitionColumns: 0:string, 1:string, 2:int, 3:double + valueColumns: 4:double, 5:double Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col4 (type: double), _col5 (type: double) Execution mode: vectorized, llap @@ -4082,11 +4083,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2, 3, 4, 5] + partitionColumns: 1:string + valueColumns: 2:int, 3:double, 4:double, 5:double Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) Reducer 3 @@ -4246,11 +4247,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4442,11 +4443,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4709,11 +4710,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double) Execution mode: vectorized, llap @@ -4990,11 +4991,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [3, 7] + partitionColumns: 2:string + valueColumns: 3:string, 7:double Statistics: Num rows: 26 Data size: 8294 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_brand (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -5478,11 +5479,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Reduce Output Operator @@ -5491,11 +5492,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Reduce Output Operator @@ -5504,11 +5505,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5892,11 +5893,11 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3, 4, 2] + partitionColumns: 0:string + valueColumns: 3:bigint, 4:bigint, 2:int Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) Reducer 9 @@ -6262,11 +6263,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string, 2:int + valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -6322,11 +6323,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2, 3] + partitionColumns: 1:string + valueColumns: 2:int, 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: double) Reducer 3 @@ -6485,11 +6486,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 5] + keyColumns: 2:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1] + partitionColumns: 2:string + valueColumns: 1:string Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string) Execution mode: vectorized, llap @@ -6649,11 +6650,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -6805,11 +6806,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -6967,11 +6968,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7139,11 +7140,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7305,11 +7306,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7481,11 +7482,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7661,11 +7662,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7840,11 +7841,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -8037,11 +8038,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -8232,11 +8233,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:string + valueColumns: 7:double Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -8455,11 +8456,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2, 1] - valueColumnNums: [5, 7] + partitionColumns: 2:string, 1:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -8555,11 +8556,11 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [4, 5, 2, 3] + partitionColumns: 0:string, 1:string + valueColumns: 4:double, 5:double, 2:int, 3:double Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_0 (type: double), min_window_1 (type: double), _col5 (type: int), _col7 (type: double) Reducer 3 @@ -8729,12 +8730,12 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 10] + keyColumns: 2:string, 10:string keyExpressions: StringSubstrColStart(col 4:string, start 1) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [4] + partitionColumns: 2:string + valueColumns: 4:string Statistics: Num rows: 26 Data size: 5252 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_type (type: string) Execution mode: vectorized, llap @@ -8918,11 +8919,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9105,11 +9106,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 5] + keyColumns: 2:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1, 7] + partitionColumns: 2:string + valueColumns: 1:string, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9292,11 +9293,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9446,11 +9447,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 5] + keyColumns: 2:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1, 7] + partitionColumns: 2:string + valueColumns: 1:string, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9603,12 +9604,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10] + keyColumns: 10:int keyExpressions: ConstantVectorExpression(val 0) -> 10:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [1, 7] + partitionColumns: 11:int + valueColumns: 1:string, 7:double Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9698,10 +9699,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 5] + valueColumns: 2:double, 5:double Statistics: Num rows: 26 Data size: 3562 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double) Reducer 3 @@ -9837,12 +9838,11 @@ STAGE PLANS: Map-reduce partition columns: 'Manufacturer#6' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 5] + keyColumns: 10:string, 5:int keyExpressions: ConstantVectorExpression(val Manufacturer#6) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [] + partitionColumns: 11:string Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -10010,12 +10010,12 @@ STAGE PLANS: Map-reduce partition columns: 'Manufacturer#1' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 1] + keyColumns: 10:string, 1:string keyExpressions: ConstantVectorExpression(val Manufacturer#1) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [7] + partitionColumns: 11:string + valueColumns: 7:double Statistics: Num rows: 5 Data size: 1135 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -10159,12 +10159,12 @@ STAGE PLANS: Map-reduce partition columns: 'm1' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10] + keyColumns: 10:string keyExpressions: ConstantVectorExpression(val m1) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [5] + partitionColumns: 11:string + valueColumns: 5:int Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out index 5ea866b..d4b3fa3 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -89,11 +89,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7] + keyColumns: 2:string, 7:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -282,11 +282,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7] + keyColumns: 2:string, 7:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -476,11 +476,10 @@ STAGE PLANS: Map-reduce partition columns: t (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 6, 7, 1, 4] + keyColumns: 0:tinyint, 6:boolean, 7:string, 1:smallint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -699,11 +698,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 7] + keyColumns: 1:smallint, 2:int, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [] + partitionColumns: 1:smallint Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -922,11 +920,10 @@ STAGE PLANS: Map-reduce partition columns: b (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 1, 7, 5] + keyColumns: 3:bigint, 1:smallint, 7:string, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:bigint Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1145,11 +1142,11 @@ STAGE PLANS: Map-reduce partition columns: f (type: float) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 3] + keyColumns: 4:float, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [7] + partitionColumns: 4:float + valueColumns: 7:string Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -1369,10 +1366,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_type (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 4] + keyColumns: 2:string, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7] + valueColumns: 7:double Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1546,11 +1543,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 4] + keyColumns: 2:string, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:string + valueColumns: 7:double Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1744,11 +1741,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 2] + keyColumns: 8:timestamp, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [7] + partitionColumns: 8:timestamp + valueColumns: 7:string Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -1987,11 +1984,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7] + keyColumns: 2:string, 7:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out index d9b62a9..f95d129 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out @@ -59,10 +59,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 4] + valueColumns: 2:int, 4:boolean Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: boolean) Execution mode: vectorized, llap @@ -112,10 +112,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [6] + keyColumns: 6:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:int Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -158,6 +158,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -202,12 +205,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 6] + keyColumns: 3:int, 6:double keyExpressions: ConstantVectorExpression(val 0) -> 3:int, DoubleColDivideDoubleColumn(col 4:double, col 5:double)(children: CastLongToDouble(col 1:bigint) -> 4:double, CastLongToDouble(col 2:bigint) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 2] + partitionColumns: 7:int + valueColumns: 1:bigint, 2:bigint Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index 55908e5..726a4f7 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -62,10 +62,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap @@ -129,12 +129,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:int, 1:bigint keyExpressions: ConstantVectorExpression(val 0) -> 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:int Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -304,10 +303,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: bigint) Execution mode: vectorized, llap @@ -363,11 +362,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2] + keyColumns: 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [0] + partitionColumns: 1:string + valueColumns: 0:int Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reducer 3 @@ -540,10 +539,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3, 4, 5] + valueColumns: 2:double, 3:double, 4:int, 5:double Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: double) Execution mode: vectorized, llap @@ -599,11 +598,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1, 3, 4, 5] + partitionColumns: 0:string + valueColumns: 1:string, 3:double, 4:int, 5:double Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double) Reducer 3 @@ -678,12 +677,12 @@ STAGE PLANS: Map-reduce partition columns: lower(_col1) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 3] + keyColumns: 7:string, 3:double keyExpressions: StringLower(col 2:string) -> 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [6, 2, 4, 5] + partitionColumns: 8:string + valueColumns: 6:int, 2:string, 4:int, 5:double Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE value expressions: rank_window_0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: double) Reducer 4 @@ -758,11 +757,11 @@ STAGE PLANS: Map-reduce partition columns: _col5 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 5] + keyColumns: 4:int, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [6, 2] + partitionColumns: 4:int + valueColumns: 6:int, 2:int Statistics: Num rows: 10 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE value expressions: dense_rank_window_1 (type: int), _col0 (type: int) Reducer 5 @@ -899,10 +898,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 4] + valueColumns: 2:int, 4:boolean Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: boolean) Execution mode: vectorized, llap @@ -952,10 +951,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [6] + keyColumns: 6:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:int Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -998,6 +997,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1042,12 +1044,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 6] + keyColumns: 3:int, 6:double keyExpressions: ConstantVectorExpression(val 0) -> 3:int, DoubleColDivideDoubleColumn(col 4:double, col 5:double)(children: CastLongToDouble(col 1:bigint) -> 4:double, CastLongToDouble(col 2:bigint) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 2] + partitionColumns: 7:int + valueColumns: 1:bigint, 2:bigint Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out index 157d184..08ed8d8 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1] + keyColumns: 7:string, 1:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [3] + partitionColumns: 7:string + valueColumns: 3:bigint Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: b (type: bigint) Execution mode: vectorized, llap @@ -10256,11 +10256,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 9] + keyColumns: 7:string, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [3, 8] + partitionColumns: 7:string + valueColumns: 3:bigint, 8:timestamp Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: b (type: bigint), ts (type: timestamp) Execution mode: vectorized, llap @@ -10352,11 +10352,11 @@ STAGE PLANS: Map-reduce partition columns: _col7 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 3] + keyColumns: 0:string, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [4, 2] + partitionColumns: 0:string + valueColumns: 4:int, 2:bigint Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col3 (type: bigint) Reducer 3 @@ -10534,10 +10534,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [7] + keyColumns: 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 4] + valueColumns: 1:smallint, 2:int, 4:float Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), i (type: int), f (type: float) Execution mode: vectorized, llap @@ -10627,10 +10627,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 3, 0] + valueColumns: 4:bigint, 3:float, 0:string Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: sum_window_0 (type: bigint), _col4 (type: float), _col7 (type: string) Reducer 3 @@ -10801,11 +10801,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 6] + keyColumns: 7:string, 6:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 10] + partitionColumns: 7:string + valueColumns: 1:smallint, 10:binary Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), bin (type: binary) Execution mode: vectorized, llap @@ -10897,11 +10897,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3] + keyColumns: 2:smallint, 3:binary native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [4, 0] + partitionColumns: 2:smallint + valueColumns: 4:int, 0:string Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col7 (type: string) Reducer 3 @@ -11074,10 +11074,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 7] + valueColumns: 4:float, 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: f (type: float), s (type: string) Execution mode: vectorized, llap @@ -11167,12 +11167,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:float keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [3, 2] + partitionColumns: 5:int + valueColumns: 3:double, 2:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: sum_window_0 (type: double), _col7 (type: string) Reducer 3 @@ -11354,11 +11354,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 9] + keyColumns: 7:string, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 4] + partitionColumns: 7:string + valueColumns: 1:smallint, 4:float Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), f (type: float) Execution mode: vectorized, llap @@ -11450,11 +11450,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3] + keyColumns: 2:smallint, 3:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [4, 0] + partitionColumns: 2:smallint + valueColumns: 4:int, 0:string Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col7 (type: string) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out index 1871216..5fb7367 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out @@ -89,11 +89,10 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:int keyExpressions: ConstantVectorExpression(val 0) -> 3:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -233,11 +232,11 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 9] + keyColumns: 5:double, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [7] + partitionColumns: 5:double + valueColumns: 7:string Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -491,11 +490,11 @@ STAGE PLANS: Map-reduce partition columns: bin (type: binary) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 5, 2] + keyColumns: 10:binary, 5:double, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [10] - valueColumnNums: [7] + partitionColumns: 10:binary + valueColumns: 7:string Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -715,11 +714,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7, 9] + keyColumns: 2:int, 7:string, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -938,11 +936,11 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 4] + keyColumns: 5:double, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 7] + partitionColumns: 5:double + valueColumns: 0:tinyint, 7:string Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE value expressions: t (type: tinyint), s (type: string) Execution mode: vectorized, llap @@ -1196,11 +1194,10 @@ STAGE PLANS: Map-reduce partition columns: bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [6, 7] + keyColumns: 6:boolean, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [] + partitionColumns: 6:boolean Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1428,12 +1425,12 @@ STAGE PLANS: Map-reduce partition columns: UDFToByte(10) (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [12, 7] + keyColumns: 12:tinyint, 7:string keyExpressions: ConstantVectorExpression(val 10) -> 12:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [13] - valueColumnNums: [2] + partitionColumns: 13:tinyint + valueColumns: 2:int Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: i (type: int) Execution mode: vectorized, llap @@ -1622,11 +1619,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1794,11 +1790,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1966,11 +1961,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -2138,11 +2132,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out index 7b6fa66..4131941 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out @@ -89,11 +89,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7, 3] + keyColumns: 2:int, 7:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -256,11 +255,10 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 7, 4] + keyColumns: 5:double, 7:string, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [] + partitionColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -423,11 +421,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [7] + partitionColumns: 8:timestamp + valueColumns: 7:string Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -556,11 +554,10 @@ STAGE PLANS: Map-reduce partition columns: t (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 7, 5] + keyColumns: 0:tinyint, 7:string, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -688,11 +685,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 7] + keyColumns: 8:timestamp, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [2] + partitionColumns: 8:timestamp + valueColumns: 2:int Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: i (type: int) Execution mode: vectorized, llap @@ -857,11 +854,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -1021,11 +1018,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -1185,11 +1182,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out index 782bd9b..e873936 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 3] + keyColumns: 1:smallint, 2:int, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [0] + partitionColumns: 1:smallint + valueColumns: 0:tinyint Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: t (type: tinyint) Execution mode: vectorized, llap @@ -339,11 +339,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint), bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 6, 2, 4] + keyColumns: 1:smallint, 6:boolean, 2:int, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1, 6] - valueColumnNums: [] + partitionColumns: 1:smallint, 6:boolean Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -561,11 +560,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint), bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 6, 2, 4] + keyColumns: 1:smallint, 6:boolean, 2:int, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1, 6] - valueColumnNums: [] + partitionColumns: 1:smallint, 6:boolean Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -783,10 +781,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [7] + keyColumns: 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:smallint, 2:int Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), i (type: int) Execution mode: vectorized, llap @@ -10932,11 +10930,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1, 2] + keyColumns: 7:string, 1:smallint, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [] + partitionColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11189,11 +11186,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1, 2] + keyColumns: 7:string, 1:smallint, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [] + partitionColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11446,11 +11442,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1, 2] + keyColumns: 7:string, 1:smallint, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [] + partitionColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11703,11 +11698,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint), bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 6, 2, 4] + keyColumns: 1:smallint, 6:boolean, 2:int, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1, 6] - valueColumnNums: [] + partitionColumns: 1:smallint, 6:boolean Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11960,11 +11954,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 6, 3] + keyColumns: 2:int, 6:boolean, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -12183,12 +12176,12 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 12] + keyColumns: 2:int, 12:char(12) keyExpressions: CastStringGroupToChar(col 7:string, maxLength 12) -> 12:char(12) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:int + valueColumns: 7:string Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -12408,12 +12401,12 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 12] + keyColumns: 2:int, 12:varchar(12) keyExpressions: CastStringGroupToVarChar(col 7:string, maxLength 12) -> 12:varchar(12) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:int + valueColumns: 7:string Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out index d2670af..b246b6e 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Map-reduce partition columns: f (type: float) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:float, 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [7] + partitionColumns: 4:float + valueColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -340,11 +340,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 2, 7] + keyColumns: 8:timestamp, 2:int, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -598,11 +597,10 @@ STAGE PLANS: Map-reduce partition columns: bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [6, 3, 7] + keyColumns: 6:boolean, 3:bigint, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [] + partitionColumns: 6:boolean Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -821,11 +819,11 @@ STAGE PLANS: Map-reduce partition columns: dec (type: decimal(4,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [9, 4] + keyColumns: 9:decimal(4,2), 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [9] - valueColumnNums: [7] + partitionColumns: 9:decimal(4,2) + valueColumns: 7:string Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -1082,10 +1080,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [8, 9] + valueColumns: 8:timestamp, 9:decimal(4,2) Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -1135,10 +1133,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1173,6 +1170,9 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col1 (type: timestamp) Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1383,10 +1383,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [8, 9] + valueColumns: 8:timestamp, 9:decimal(4,2) Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -1436,10 +1436,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1475,6 +1474,9 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: timestamp) Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(4,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1686,10 +1688,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [8, 9] + valueColumns: 8:timestamp, 9:decimal(4,2) Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: timestamp), _col3 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -1739,10 +1741,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1777,6 +1778,9 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col2 (type: timestamp) Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out index cf8f348..b682a29 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out @@ -83,11 +83,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:string Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out index 584453c..48ef43b 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out @@ -81,11 +81,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7, 3] + keyColumns: 2:int, 7:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -338,11 +337,10 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 7, 4] + keyColumns: 5:double, 7:string, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [] + partitionColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -595,11 +593,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [7] + partitionColumns: 8:timestamp + valueColumns: 7:string Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -818,11 +816,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 7, 4] + keyColumns: 8:timestamp, 7:string, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1040,11 +1037,10 @@ STAGE PLANS: Map-reduce partition columns: t (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 7, 5] + keyColumns: 0:tinyint, 7:string, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1262,11 +1258,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 7] + keyColumns: 8:timestamp, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [2] + partitionColumns: 8:timestamp + valueColumns: 2:int Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: i (type: int) Execution mode: vectorized, llap @@ -1520,11 +1516,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1777,11 +1772,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1999,11 +1993,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -2165,11 +2159,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -2331,11 +2325,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out index 78df440..3ee7ecd 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out @@ -80,11 +80,10 @@ STAGE PLANS: Map-reduce partition columns: type (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [] + partitionColumns: 1:string Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index c710096..5b3c240 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -63,10 +63,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -119,10 +118,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -244,10 +243,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -300,10 +298,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -575,10 +572,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -631,10 +627,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -756,10 +752,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -812,10 +807,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -1087,10 +1081,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:float, 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -1143,10 +1136,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1268,10 +1261,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: vectorized, llap @@ -1324,10 +1316,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -1646,10 +1637,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6] + valueColumns: 0:bigint, 1:bigint, 2:double, 3:double, 4:bigint, 5:double, 6:tinyint Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_1.q.out ql/src/test/results/clientpositive/llap/vectorization_1.q.out index d2fcbc2..a0b9d23 100644 --- ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -97,10 +97,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 0:double, 1:double, 2:bigint, 3:double, 4:tinyint, 5:int, 6:double, 7:double, 8:bigint, 9:bigint Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: tinyint), _col5 (type: int), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_12.q.out ql/src/test/results/clientpositive/llap/vectorization_12.q.out index b90a7fe..d45ef6a 100644 --- ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -124,10 +124,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:double, 1:bigint, 2:string, 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8, 9, 10] + valueColumns: 4:bigint, 5:double, 6:double, 7:double, 8:bigint, 9:bigint, 10:double Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized, llap @@ -191,10 +191,10 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:bigint, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] + valueColumns: 3:boolean, 11:double, 12:bigint, 4:bigint, 13:bigint, 14:double, 19:double, 15:double, 20:double, 22:double, 24:decimal(22,2), 9:bigint, 26:double, 25:double, 21:double, 27:double Statistics: Num rows: 1 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index efcf292..28b4c5e 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -126,10 +126,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + valueColumns: 5:tinyint, 6:double, 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:bigint, 13:float, 14:tinyint Statistics: Num rows: 2730 Data size: 510974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized, llap @@ -193,10 +193,9 @@ STAGE PLANS: sort order: +++++++++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string, 15:tinyint, 5:tinyint, 17:tinyint, 6:double, 20:double, 19:double, 21:float, 22:double, 23:double, 24:double, 27:decimal(7,3), 28:double, 25:double, 13:float, 31:double, 14:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index ac63435..74daac3 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -126,10 +126,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp, 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10, 11] + valueColumns: 5:double, 6:double, 7:bigint, 8:float, 9:double, 10:double, 11:bigint Statistics: Num rows: 303 Data size: 52846 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized, llap @@ -193,10 +193,10 @@ STAGE PLANS: sort order: ++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] + valueColumns: 4:boolean, 12:double, 14:double, 13:double, 15:float, 8:float, 19:float, 20:float, 21:double, 22:double, 11:bigint, 24:double, 25:double, 23:double, 29:double, 28:double, 31:double, 34:double Statistics: Num rows: 151 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index db49ad2..441097a 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -122,10 +122,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4, 5, 6] + keyColumns: 0:float, 1:boolean, 2:double, 3:string, 4:tinyint, 5:int, 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + valueColumns: 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:double, 13:bigint, 14:double, 15:double, 16:bigint Statistics: Num rows: 6144 Data size: 1278652 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index 5f7c8c2..d80c750 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -99,10 +99,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:double, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6] + valueColumns: 3:bigint, 4:double, 5:double, 6:double Statistics: Num rows: 2048 Data size: 303516 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_17.q.out ql/src/test/results/clientpositive/llap/vectorization_17.q.out index ef47725..0122365 100644 --- ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -92,10 +92,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] + keyColumns: 3:bigint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] + valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 14:double, 17:double, 19:double, 20:double, 22:decimal(11,4), 18:double Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_2.q.out ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 32b62ba..6a0a81b 100644 --- ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -101,10 +101,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 0:bigint, 1:bigint, 2:double, 3:double, 4:double, 5:bigint, 6:bigint, 7:tinyint, 8:double, 9:bigint Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_3.q.out ql/src/test/results/clientpositive/llap/vectorization_3.q.out index 944c0f7..1e120f4 100644 --- ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -106,10 +106,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + valueColumns: 0:double, 1:double, 2:bigint, 3:double, 4:double, 5:bigint, 6:double, 7:double, 8:bigint, 9:double, 10:bigint, 11:bigint, 12:double, 13:double Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_4.q.out ql/src/test/results/clientpositive/llap/vectorization_4.q.out index 9eabe7e..c34d8a9 100644 --- ql/src/test/results/clientpositive/llap/vectorization_4.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_4.q.out @@ -101,10 +101,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:bigint, 1:double, 2:double, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_5.q.out ql/src/test/results/clientpositive/llap/vectorization_5.q.out index 15ee8e0..9b82b34 100644 --- ql/src/test/results/clientpositive/llap/vectorization_5.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_5.q.out @@ -94,10 +94,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:smallint, 1:bigint, 2:smallint, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_7.q.out ql/src/test/results/clientpositive/llap/vectorization_7.q.out index 008068e..eea43b1 100644 --- ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -98,10 +98,9 @@ STAGE PLANS: sort order: +++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] + keyColumns: 10:boolean, 3:bigint, 1:smallint, 0:tinyint, 8:timestamp, 6:string, 14:bigint, 15:int, 16:smallint, 17:tinyint, 19:int, 20:bigint, 18:int, 21:tinyint, 23:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_8.q.out ql/src/test/results/clientpositive/llap/vectorization_8.q.out index de95bbb..1aa3bdf 100644 --- ql/src/test/results/clientpositive/llap/vectorization_8.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_8.q.out @@ -94,10 +94,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] + keyColumns: 8:timestamp, 5:double, 10:boolean, 6:string, 4:float, 13:double, 14:double, 15:double, 17:float, 19:double, 16:double, 18:float, 20:float, 22:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index 5f7c8c2..d80c750 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -99,10 +99,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:double, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6] + valueColumns: 3:bigint, 4:double, 5:double, 6:double Statistics: Num rows: 2048 Data size: 303516 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 3d5bea1..04f7386 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -147,10 +147,9 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5, 1] + keyColumns: 0:tinyint, 5:double, 1:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap @@ -305,10 +304,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:double, 2:bigint Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap @@ -372,10 +371,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 3] + keyColumns: 0:tinyint, 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 @@ -518,11 +516,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap @@ -678,11 +675,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:tinyint, 1:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -748,10 +744,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:tinyint, 1:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 @@ -916,10 +911,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 4586 Data size: 64088 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap @@ -974,10 +969,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:bigint, 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4586 Data size: 64088 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index 1a87d1d..a3e1b2c 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -53,10 +53,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index d142f09..5f50131 100644 --- ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -150,6 +150,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -227,6 +230,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -304,6 +310,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 2591c28..7e78396 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -317,10 +317,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -461,10 +460,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index 7e78360..826fc5f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -82,10 +82,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -216,10 +215,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6105 Data size: 18232 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -291,10 +289,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:double, 3:double Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index da71d06..68ffb3c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -328,6 +328,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -470,6 +473,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -684,6 +690,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 809600 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -703,6 +712,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -879,6 +891,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 809600 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -898,6 +913,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -1084,6 +1102,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1226,6 +1247,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1394,6 +1418,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1536,6 +1563,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1702,6 +1732,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1859,6 +1892,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2001,6 +2037,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2143,6 +2182,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2313,6 +2355,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2447,6 +2492,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2618,6 +2666,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2790,6 +2841,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2947,6 +3001,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3084,6 +3141,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3221,6 +3281,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3403,6 +3466,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1100 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3422,6 +3488,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3595,6 +3664,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3614,6 +3686,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3783,6 +3858,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -4042,6 +4120,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -4404,6 +4485,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: @@ -5612,6 +5696,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 2aa9af2..c22a127 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -180,6 +180,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -414,6 +417,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -648,6 +654,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -956,6 +965,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1248,6 +1260,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1511,6 +1526,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index e416954..946f37e 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -279,10 +279,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(10,1)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [1] + keyColumns: 1:decimal(10,1) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -331,10 +330,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(10,1)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [1] + keyColumns: 1:decimal(10,1) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: decimal(10,1)) @@ -360,10 +358,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:decimal(10,1), 1:decimal(10,1), 2:binary Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,1)), _col1 (type: decimal(10,1)), _col2 (type: binary) Execution mode: vectorized, llap @@ -402,6 +399,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -472,10 +472,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:decimal(10,1), 1:decimal(10,1), 2:binary Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,1)), _col1 (type: decimal(10,1)), _col2 (type: binary) diff --git ql/src/test/results/clientpositive/llap/vectorized_join46.q.out ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index 6b25672..1e83f7b 100644 --- ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -19,6 +19,7 @@ POSTHOOK: Output: default@test1_n14 POSTHOOK: Lineage: test1_n14.col_1 SCRIPT [] POSTHOOK: Lineage: test1_n14.key SCRIPT [] POSTHOOK: Lineage: test1_n14.value SCRIPT [] +col1 col2 col3 PREHOOK: query: CREATE TABLE test2_n9 (key INT, value INT, col_2 STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -40,16 +41,22 @@ POSTHOOK: Output: default@test2_n9 POSTHOOK: Lineage: test2_n9.col_2 SCRIPT [] POSTHOOK: Lineage: test2_n9.key SCRIPT [] POSTHOOK: Lineage: test2_n9.value SCRIPT [] -PREHOOK: query: EXPLAIN +col1 col2 col3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,9 +74,16 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -77,12 +91,26 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:string + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 5 + projectedOutput: 0:int, 1:int, 2:string, 4:int, 5:int, 6:string + smallTableValueMapping: 4:int, 6:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 859 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 8 Data size: 859 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -90,23 +118,66 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -128,6 +199,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -136,20 +208,25 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND test1_n14.key between 100 and 102 AND test2_n9.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND test1_n14.key between 100 and 102 AND test2_n9.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -167,9 +244,16 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -180,12 +264,27 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColumnBetween(col 0:int, left 100, right 102) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:string + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 5 + projectedOutput: 0:int, 1:int, 2:string, 4:int, 5:int, 6:string + smallTableValueMapping: 4:int, 6:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -193,27 +292,74 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2_n9 filterExpr: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 0:int, left 100, right 102) predicate: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -239,6 +385,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 98 NULL None NULL NULL NULL @@ -246,18 +393,23 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.key between 100 and 102 AND test2_n9.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.key between 100 and 102 AND test2_n9.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -275,9 +427,16 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -288,12 +447,21 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -301,25 +469,71 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2_n9 filterExpr: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 0:int, left 100, right 102) predicate: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -344,22 +558,28 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND true) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND true) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -377,26 +597,61 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -404,12 +659,26 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:string + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 5 + projectedOutput: 4:int, 5:int, 6:string, 0:int, 1:int, 2:string + smallTableValueMapping: 4:int, 6:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 Statistics: Num rows: 8 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 8 Data size: 856 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -417,6 +686,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Stage: Stage-0 Fetch Operator @@ -438,6 +722,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 101 2 Car 102 2 Del 101 2 Car 103 2 Ema 99 2 Mat 102 2 Del @@ -445,16 +730,21 @@ POSTHOOK: Input: default@test2_n9 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -472,9 +762,16 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -485,12 +782,21 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -498,21 +804,63 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -535,6 +883,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -548,20 +897,25 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -603,21 +957,54 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -644,6 +1031,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -658,18 +1046,23 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -711,21 +1104,54 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -750,6 +1176,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -764,18 +1191,23 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -817,21 +1249,54 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -856,6 +1321,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -864,20 +1330,25 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND (test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND (test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -919,23 +1390,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -961,6 +1466,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -969,20 +1475,25 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat 102 2 Del NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1000,16 +1511,43 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1039,6 +1577,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1065,6 +1609,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1079,18 +1624,23 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1108,16 +1658,43 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1147,6 +1724,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1171,6 +1754,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1182,18 +1766,23 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1211,16 +1800,43 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1250,6 +1866,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1274,6 +1896,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -1284,20 +1907,25 @@ POSTHOOK: Input: default@test2_n9 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND (test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 RIGHT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND (test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1315,18 +1943,46 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1356,6 +2012,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1381,26 +2043,32 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 101 2 Car 102 2 Del 101 2 Car 103 2 Ema 99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1418,37 +2086,91 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1462,6 +2184,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1488,6 +2213,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1502,18 +2228,23 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test1_n14.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1531,37 +2262,91 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1575,6 +2360,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1599,6 +2387,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1613,18 +2402,23 @@ POSTHOOK: Input: default@test2_n9 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value OR test2_n9.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1642,37 +2436,91 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1686,6 +2534,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1710,6 +2561,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -1720,20 +2572,25 @@ POSTHOOK: Input: default@test2_n9 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND (test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1_n14 FULL OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value AND (test1_n14.key between 100 and 102 OR test2_n9.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1751,41 +2608,97 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -1799,6 +2712,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1824,6 +2740,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n14 POSTHOOK: Input: default@test2_n9 #### A masked pattern was here #### +test1_n14.key test1_n14.value test1_n14.col_1 test2_n9.key test2_n9.value test2_n9.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 41ca076..0c751db 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -56,6 +56,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index 8efe78d..c9b9e81 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -146,14 +146,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:decimal(8,1) bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:decimal(8,1) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -217,10 +219,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -281,10 +282,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -351,14 +351,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:decimal(8,1) bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:decimal(8,1) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -422,10 +424,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -486,10 +487,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -556,14 +556,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:decimal(8,1) bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:decimal(8,1) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -627,10 +629,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -691,10 +692,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 748dea1..5527e64 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -155,11 +155,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -410,10 +410,10 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 5] + valueColumns: 1:string, 2:string, 5:int Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Execution mode: vectorized, llap @@ -455,10 +455,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -494,6 +493,9 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Vectorization: @@ -663,11 +665,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -822,11 +824,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1074,11 +1076,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -1296,11 +1298,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -1520,11 +1522,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized, llap @@ -1566,10 +1568,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1641,6 +1642,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1745,10 +1749,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1781,11 +1784,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized, llap @@ -1827,6 +1830,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Vectorization: @@ -2430,11 +2436,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2681,11 +2687,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3002,11 +3008,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3224,11 +3230,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3270,10 +3276,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3344,6 +3349,9 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Vectorization: @@ -3518,11 +3526,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3756,11 +3764,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double) Execution mode: vectorized, llap @@ -4019,11 +4027,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4536,10 +4544,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized, llap @@ -4885,10 +4893,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized, llap @@ -5231,10 +5239,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5553,10 +5561,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5917,10 +5925,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -6250,10 +6258,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index 6421650..7142732 100644 --- ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -134,6 +134,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index aa4d888..384bf61 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -155,10 +155,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:timestamp, 1:timestamp Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: timestamp), _col1 (type: timestamp) Execution mode: vectorized, llap @@ -381,10 +380,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:double, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: bigint) Execution mode: vectorized, llap @@ -522,10 +520,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:double, 1:double, 2:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/mapjoin2.q.out ql/src/test/results/clientpositive/mapjoin2.q.out index 6b85e13..2288b4b 100644 --- ql/src/test/results/clientpositive/mapjoin2.q.out +++ ql/src/test/results/clientpositive/mapjoin2.q.out @@ -27,6 +27,85 @@ POSTHOOK: Output: default@tbl_n1 POSTHOOK: Lineage: tbl_n1.n SCRIPT [] POSTHOOK: Lineage: tbl_n1.t SCRIPT [] Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:tbl_n1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:tbl_n1 + TableScan + alias: tbl_n1 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: n (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 1L) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (n = 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1L (type: bigint), _col0 (type: string), _col1 is null (type: boolean), _col2 is null (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -37,6 +116,91 @@ POSTHOOK: Input: default@tbl_n1 #### A masked pattern was here #### 1 one true true Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:tbl_n1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:tbl_n1 + TableScan + alias: tbl_n1 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: n (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 2L) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (n = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 is null (type: boolean), _col1 is null (type: boolean), 2L (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -47,6 +211,81 @@ POSTHOOK: Input: default@tbl_n1 #### A masked pattern was here #### true true 2 two Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl_n1 + filterExpr: (n = 1L) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (n = 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1L (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: string) + TableScan + alias: tbl_n1 + filterExpr: (n = 2L) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (n = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 2L (type: bigint), t (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 + 1 {false} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 is null (type: boolean), _col1 is null (type: boolean), _col2 is null (type: boolean), _col3 is null (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -58,6 +297,74 @@ POSTHOOK: Input: default@tbl_n1 false false true true true true false false Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:_dummy_table + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), 1 (type: int), 0 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -68,6 +375,77 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:_dummy_table + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), _col1 (type: int), 0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -78,6 +456,83 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:_dummy_table + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col2 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -87,6 +542,83 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:_dummy_table + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 11 (type: int), 1 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: int), _col2 (type: int), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/mapjoin46.q.out ql/src/test/results/clientpositive/mapjoin46.q.out index a4f067f..fce94a3 100644 --- ql/src/test/results/clientpositive/mapjoin46.q.out +++ ql/src/test/results/clientpositive/mapjoin46.q.out @@ -124,14 +124,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -234,12 +234,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -340,12 +340,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob 102 2 Del +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob 102 2 Del -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -430,10 +430,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product @@ -528,10 +528,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat NULL NULL NULL 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -540,6 +536,10 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -635,11 +635,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -648,6 +643,11 @@ NULL NULL None 102 2 Del 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -739,11 +739,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -752,6 +747,11 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -843,14 +843,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob 102 2 Del -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -944,13 +944,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1046,19 +1046,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del 100 1 Bob 102 2 Del -101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 103 2 Ema -101 2 Car 103 2 Ema 100 1 Bob 104 3 Fli -101 2 Car 104 3 Fli 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1150,16 +1150,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 100 1 Bob 102 2 Del -101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 103 2 Ema -101 2 Car 103 2 Ema 100 1 Bob 104 3 Fli -101 2 Car 104 3 Fli 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli 101 2 Car 105 NULL None +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1251,16 +1251,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del -100 1 Bob 102 2 Del -101 2 Car 102 2 Del 99 2 Mat 103 2 Ema -101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -1354,9 +1354,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product @@ -1407,7 +1407,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1447,31 +1447,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -101 2 Car 105 NULL None -101 2 Car 104 3 Fli -101 2 Car 103 2 Ema -101 2 Car 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli -100 1 Bob 103 2 Ema 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema -99 2 Mat 102 2 Del -99 0 Alice 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1506,12 +1508,12 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1531,7 +1533,8 @@ Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAP PREHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1_n4 PREHOOK: Input: default@test2_n2 @@ -1539,36 +1542,37 @@ PREHOOK: Input: default@test2_n2 POSTHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -101 2 Car 105 NULL None -101 2 Car 104 3 Fli -101 2 Car 103 2 Ema -101 2 Car 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli -100 1 Bob 103 2 Ema 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del 99 2 Mat 102 2 Del -99 0 Alice NULL NULL NULL -98 NULL None NULL NULL NULL -NULL NULL None NULL NULL NULL +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1603,12 +1607,12 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1628,7 +1632,7 @@ Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAP PREHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1_n4 PREHOOK: Input: default@test2_n2 @@ -1636,34 +1640,36 @@ PREHOOK: Input: default@test2_n2 POSTHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -101 2 Car 103 2 Ema -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del -99 0 Alice 102 2 Del -98 NULL None 102 2 Del -NULL NULL None 102 2 Del -NULL NULL NULL 105 NULL None -NULL NULL NULL 104 3 Fli +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1681,11 +1687,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) TableScan alias: test2_n2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE @@ -1694,24 +1698,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1723,11 +1725,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1_n4 PREHOOK: Input: default@test2_n2 @@ -1735,37 +1737,426 @@ PREHOOK: Input: default@test2_n2 POSTHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None NULL NULL NULL -NULL NULL None NULL NULL NULL -NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema -101 2 Car 102 2 Del 99 2 Mat 102 2 Del -NULL NULL NULL 104 3 Fli -Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * -FROM ( - SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, - test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 - FROM test1_n4 RIGHT OUTER JOIN test2_n2 - ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) - ) sq1 -FULL OUTER JOIN ( - SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, - test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 - FROM test1_n4 LEFT OUTER JOIN test2_n2 - ON (test1_n4.value=test2_n2.value +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value AND (test1_n4.key between 100 and 102 OR test2_n2.key between 100 and 102)) ) sq2 @@ -1880,7 +2271,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1946,23 +2337,239 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema -NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del -NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL -NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del -NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL -NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL -NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL -NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL -NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del -NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL -101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-2 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1_n4 + Fetch Operator + limit: -1 + $hdt$_2:$hdt$_3:test2_n2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1_n4 + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + $hdt$_2:$hdt$_3:test2_n2 + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### 101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL diff --git ql/src/test/results/clientpositive/mapjoin47.q.out ql/src/test/results/clientpositive/mapjoin47.q.out index 172d160..c42094d 100644 --- ql/src/test/results/clientpositive/mapjoin47.q.out +++ ql/src/test/results/clientpositive/mapjoin47.q.out @@ -1415,7 +1415,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1521,7 +1521,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/mergejoin.q.out ql/src/test/results/clientpositive/mergejoin.q.out index 7cbcbbe..95b961f 100644 --- ql/src/test/results/clientpositive/mergejoin.q.out +++ ql/src/test/results/clientpositive/mergejoin.q.out @@ -1706,7 +1706,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/mergejoins_mixed.q.out ql/src/test/results/clientpositive/mergejoins_mixed.q.out index 9ac6d59..4d94085 100644 --- ql/src/test/results/clientpositive/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/mergejoins_mixed.q.out @@ -820,7 +820,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col3 (type: string) 1 _col0 (type: string) @@ -859,7 +859,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1161,7 +1161,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1463,7 +1463,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/optional_outer.q.out ql/src/test/results/clientpositive/optional_outer.q.out index 9ec1af7..efc952c 100644 --- ql/src/test/results/clientpositive/optional_outer.q.out +++ ql/src/test/results/clientpositive/optional_outer.q.out @@ -283,7 +283,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -344,7 +344,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/perf/spark/query51.q.out ql/src/test/results/clientpositive/perf/spark/query51.q.out index 21afbe2..1e5e4fd 100644 --- ql/src/test/results/clientpositive/perf/spark/query51.q.out +++ ql/src/test/results/clientpositive/perf/spark/query51.q.out @@ -271,7 +271,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: string) 1 _col0 (type: int), _col1 (type: string) diff --git ql/src/test/results/clientpositive/perf/spark/query97.q.out ql/src/test/results/clientpositive/perf/spark/query97.q.out index c4f4804..66f593b 100644 --- ql/src/test/results/clientpositive/perf/spark/query97.q.out +++ ql/src/test/results/clientpositive/perf/spark/query97.q.out @@ -201,7 +201,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out index 78f20cc..a8e8505 100644 --- ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out +++ ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out @@ -224,7 +224,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -299,7 +299,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/skewjoinopt3.q.out ql/src/test/results/clientpositive/skewjoinopt3.q.out index ef8af9f..9699002 100644 --- ql/src/test/results/clientpositive/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/skewjoinopt3.q.out @@ -247,7 +247,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -322,7 +322,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/smb_mapjoin_1.q.out index 1f334bd..75210a3 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_1.q.out @@ -272,7 +272,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -494,7 +494,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/smb_mapjoin_2.q.out index e39edcc..480b12e 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_2.q.out @@ -232,7 +232,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -458,7 +458,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/smb_mapjoin_3.q.out index fa7968d..ba0a2ce 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_3.q.out @@ -231,7 +231,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -455,7 +455,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_46.q.out ql/src/test/results/clientpositive/smb_mapjoin_46.q.out index 1627af2..d9f14e7 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_46.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_46.q.out @@ -1319,7 +1319,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1418,7 +1418,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1515,7 +1515,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1614,7 +1614,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_47.q.out ql/src/test/results/clientpositive/smb_mapjoin_47.q.out index 9cc2de4..c596fc5 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_47.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_47.q.out @@ -1336,7 +1336,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -1442,7 +1442,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index 83033b0..ef5cca6 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -633,7 +633,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 667b9b3..6dd5a99 100644 --- ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -1012,7 +1012,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/auto_join14.q.out ql/src/test/results/clientpositive/spark/auto_join14.q.out index 42cee3e..ffa7342 100644 --- ql/src/test/results/clientpositive/spark/auto_join14.q.out +++ ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -129,4 +129,4 @@ POSTHOOK: query: SELECT sum(hash(dest1_n83.c1,dest1_n83.c2)) FROM dest1_n83 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1_n83 #### A masked pattern was here #### -404554174174 +404540072956 diff --git ql/src/test/results/clientpositive/spark/auto_join18.q.out ql/src/test/results/clientpositive/spark/auto_join18.q.out index 35a01e4..33d15f6 100644 --- ql/src/test/results/clientpositive/spark/auto_join18.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18.q.out @@ -99,7 +99,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out index def1de2..43f6ca5 100644 --- ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -101,7 +101,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join6.q.out ql/src/test/results/clientpositive/spark/auto_join6.q.out index b17ca86..f7c910e 100644 --- ql/src/test/results/clientpositive/spark/auto_join6.q.out +++ ql/src/test/results/clientpositive/spark/auto_join6.q.out @@ -92,7 +92,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join7.q.out ql/src/test/results/clientpositive/spark/auto_join7.q.out index aeec43e..cc67d2f 100644 --- ql/src/test/results/clientpositive/spark/auto_join7.q.out +++ ql/src/test/results/clientpositive/spark/auto_join7.q.out @@ -122,7 +122,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join_filters.q.out ql/src/test/results/clientpositive/spark/auto_join_filters.q.out index 4af239b..09c167a 100644 --- ql/src/test/results/clientpositive/spark/auto_join_filters.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_filters.q.out @@ -54,6 +54,109 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n5 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} + 1 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 441 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 441 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 @@ -198,6 +301,138 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n5 #### A masked pattern was here #### 4939870 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(KEY.reducesinkkey0 > 40)} {(VALUE._col0 > 50)} {(KEY.reducesinkkey0 = VALUE._col0)} + 1 {(VALUE._col0 > 40)} {(KEY.reducesinkkey0 > 50)} {(VALUE._col0 = KEY.reducesinkkey0)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 @@ -340,6 +575,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n5 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 @@ -484,6 +729,138 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n5 #### A masked pattern was here #### 4939870 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(KEY.reducesinkkey0 > 40)} {(VALUE._col0 > 50)} {(KEY.reducesinkkey0 = VALUE._col0)} + 1 {(VALUE._col0 > 40)} {(KEY.reducesinkkey0 > 50)} {(VALUE._col0 = KEY.reducesinkkey0)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n5 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 diff --git ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out index 5f089d7..a1f669b 100644 --- ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out @@ -188,6 +188,135 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n2 #### A masked pattern was here #### 4543526 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1_n2 +#### A masked pattern was here #### +4543526 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n2 a LEFT OUTER JOIN myinput1_n2 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1_n2 c ON (b.value=c.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n2 diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 18e75aa..87f0ca8 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -289,74 +289,12 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 0 val_0 val_0 0 val_0 val_0 0 val_0 val_0 -2 val_2 val_2 -4 val_4 val_4 -8 val_8 val_8 -11 val_11 val_11 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -17 val_17 val_17 -19 val_19 val_19 -20 val_20 val_20 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -28 val_28 val_28 -33 val_33 val_33 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -44 val_44 val_44 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -53 val_53 val_53 -57 val_57 val_57 -64 val_64 val_64 -66 val_66 val_66 -77 val_77 val_77 -80 val_80 val_80 -82 val_82 val_82 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -86 val_86 val_86 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 103 val_103 val_103 103 val_103 val_103 103 val_103 val_103 103 val_103 val_103 105 val_105 val_105 +11 val_11 val_11 114 val_114 val_114 116 val_116 val_116 118 val_118 val_118 @@ -398,6 +336,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 149 val_149 val_149 149 val_149 val_149 149 val_149 val_149 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 150 val_150 val_150 152 val_152 val_152 152 val_152 val_152 @@ -435,6 +377,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 169 val_169 val_169 169 val_169 val_169 169 val_169 val_169 +17 val_17 val_17 170 val_170 val_170 172 val_172 val_172 172 val_172 val_172 @@ -461,10 +404,13 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 187 val_187 val_187 187 val_187 val_187 189 val_189 val_189 +19 val_19 val_19 190 val_190 val_190 192 val_192 val_192 194 val_194 val_194 196 val_196 val_196 +2 val_2 val_2 +20 val_20 val_20 200 val_200 val_200 200 val_200 val_200 200 val_200 val_200 @@ -511,6 +457,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 239 val_239 val_239 239 val_239 val_239 239 val_239 val_239 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 242 val_242 val_242 242 val_242 val_242 242 val_242 val_242 @@ -522,6 +472,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 255 val_255 val_255 255 val_255 val_255 257 val_257 val_257 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 260 val_260 val_260 262 val_262 val_262 266 val_266 val_266 @@ -551,6 +505,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 277 val_277 val_277 277 val_277 val_277 277 val_277 val_277 +28 val_28 val_28 280 val_280 val_280 280 val_280 val_280 280 val_280 val_280 @@ -612,11 +567,21 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 327 val_327 val_327 327 val_327 val_327 327 val_327 val_327 +33 val_33 val_33 332 val_332 val_332 336 val_336 val_336 338 val_338 val_338 341 val_341 val_341 345 val_345 val_345 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 356 val_356 val_356 365 val_365 val_365 367 val_367 val_367 @@ -632,6 +597,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 369 val_369 val_369 369 val_369 val_369 369 val_369 val_369 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 374 val_374 val_374 378 val_378 val_378 389 val_389 val_389 @@ -646,6 +615,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 396 val_396 val_396 396 val_396 val_396 396 val_396 val_396 +4 val_4 val_4 400 val_400 val_400 402 val_402 val_402 404 val_404 val_404 @@ -683,6 +653,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 417 val_417 val_417 417 val_417 val_417 419 val_419 val_419 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 424 val_424 val_424 424 val_424 val_424 424 val_424 val_424 @@ -702,6 +676,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 439 val_439 val_439 439 val_439 val_439 439 val_439 val_439 +44 val_44 val_44 444 val_444 val_444 446 val_446 val_446 448 val_448 val_448 @@ -760,6 +735,31 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 493 val_493 val_493 495 val_495 val_495 497 val_497 val_497 +51 val_51 val_51 +51 val_51 val_51 +51 val_51 val_51 +51 val_51 val_51 +53 val_53 val_53 +57 val_57 val_57 +64 val_64 val_64 +66 val_66 val_66 +77 val_77 val_77 +8 val_8 val_8 +80 val_80 val_80 +82 val_82 val_82 +84 val_84 val_84 +84 val_84 val_84 +84 val_84 val_84 +84 val_84 val_84 +86 val_86 val_86 +95 val_95 val_95 +95 val_95 val_95 +95 val_95 val_95 +95 val_95 val_95 +97 val_97 val_97 +97 val_97 val_97 +97 val_97 val_97 +97 val_97 val_97 PREHOOK: query: explain select a.key, a.value, b.value from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value @@ -885,74 +885,12 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 0 val_0 val_0 0 val_0 val_0 0 val_0 val_0 -2 val_2 val_2 -4 val_4 val_4 -8 val_8 val_8 -11 val_11 val_11 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -17 val_17 val_17 -19 val_19 val_19 -20 val_20 val_20 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -28 val_28 val_28 -33 val_33 val_33 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -44 val_44 val_44 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -53 val_53 val_53 -57 val_57 val_57 -64 val_64 val_64 -66 val_66 val_66 -77 val_77 val_77 -80 val_80 val_80 -82 val_82 val_82 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -86 val_86 val_86 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 103 val_103 val_103 103 val_103 val_103 103 val_103 val_103 103 val_103 val_103 105 val_105 val_105 +11 val_11 val_11 114 val_114 val_114 116 val_116 val_116 118 val_118 val_118 @@ -994,6 +932,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 149 val_149 val_149 149 val_149 val_149 149 val_149 val_149 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 150 val_150 val_150 152 val_152 val_152 152 val_152 val_152 @@ -1031,6 +973,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 169 val_169 val_169 169 val_169 val_169 169 val_169 val_169 +17 val_17 val_17 170 val_170 val_170 172 val_172 val_172 172 val_172 val_172 @@ -1057,10 +1000,13 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 187 val_187 val_187 187 val_187 val_187 189 val_189 val_189 +19 val_19 val_19 190 val_190 val_190 192 val_192 val_192 194 val_194 val_194 196 val_196 val_196 +2 val_2 val_2 +20 val_20 val_20 200 val_200 val_200 200 val_200 val_200 200 val_200 val_200 @@ -1107,6 +1053,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 239 val_239 val_239 239 val_239 val_239 239 val_239 val_239 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 242 val_242 val_242 242 val_242 val_242 242 val_242 val_242 @@ -1118,6 +1068,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 255 val_255 val_255 255 val_255 val_255 257 val_257 val_257 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 260 val_260 val_260 262 val_262 val_262 266 val_266 val_266 @@ -1147,6 +1101,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 277 val_277 val_277 277 val_277 val_277 277 val_277 val_277 +28 val_28 val_28 280 val_280 val_280 280 val_280 val_280 280 val_280 val_280 @@ -1208,11 +1163,21 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 327 val_327 val_327 327 val_327 val_327 327 val_327 val_327 +33 val_33 val_33 332 val_332 val_332 336 val_336 val_336 338 val_338 val_338 341 val_341 val_341 345 val_345 val_345 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 356 val_356 val_356 365 val_365 val_365 367 val_367 val_367 @@ -1228,6 +1193,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 369 val_369 val_369 369 val_369 val_369 369 val_369 val_369 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 374 val_374 val_374 378 val_378 val_378 389 val_389 val_389 @@ -1242,6 +1211,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 396 val_396 val_396 396 val_396 val_396 396 val_396 val_396 +4 val_4 val_4 400 val_400 val_400 402 val_402 val_402 404 val_404 val_404 @@ -1279,6 +1249,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 417 val_417 val_417 417 val_417 val_417 419 val_419 val_419 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 +42 val_42 val_42 424 val_424 val_424 424 val_424 val_424 424 val_424 val_424 @@ -1298,6 +1272,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 439 val_439 val_439 439 val_439 val_439 439 val_439 val_439 +44 val_44 val_44 444 val_444 val_444 446 val_446 val_446 448 val_448 val_448 @@ -1356,6 +1331,31 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 493 val_493 val_493 495 val_495 val_495 497 val_497 val_497 +51 val_51 val_51 +51 val_51 val_51 +51 val_51 val_51 +51 val_51 val_51 +53 val_53 val_53 +57 val_57 val_57 +64 val_64 val_64 +66 val_66 val_66 +77 val_77 val_77 +8 val_8 val_8 +80 val_80 val_80 +82 val_82 val_82 +84 val_84 val_84 +84 val_84 val_84 +84 val_84 val_84 +84 val_84 val_84 +86 val_86 val_86 +95 val_95 val_95 +95 val_95 val_95 +95 val_95 val_95 +95 val_95 val_95 +97 val_97 val_97 +97 val_97 val_97 +97 val_97 val_97 +97 val_97 val_97 PREHOOK: query: explain select count(*) from @@ -4848,329 +4848,432 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 0 val_0 val_0 0 val_0 val_0 0 val_0 val_0 -2 val_2 val_2 -4 val_4 val_4 -8 val_8 val_8 +0 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_118 val_118 +0 val_118 val_118 +0 val_118 val_118 +0 val_118 val_118 +0 val_125 val_125 +0 val_125 val_125 +0 val_125 val_125 +0 val_125 val_125 +0 val_129 val_129 +0 val_129 val_129 +0 val_129 val_129 +0 val_129 val_129 +0 val_134 val_134 +0 val_134 val_134 +0 val_134 val_134 +0 val_134 val_134 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_149 val_149 +0 val_149 val_149 +0 val_149 val_149 +0 val_149 val_149 +0 val_15 val_15 +0 val_15 val_15 +0 val_15 val_15 +0 val_15 val_15 +0 val_152 val_152 +0 val_152 val_152 +0 val_152 val_152 +0 val_152 val_152 +0 val_165 val_165 +0 val_165 val_165 +0 val_165 val_165 +0 val_165 val_165 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_172 val_172 +0 val_172 val_172 +0 val_172 val_172 +0 val_172 val_172 +0 val_174 val_174 +0 val_174 val_174 +0 val_174 val_174 +0 val_174 val_174 +0 val_176 val_176 +0 val_176 val_176 +0 val_176 val_176 +0 val_176 val_176 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_200 val_200 +0 val_200 val_200 +0 val_200 val_200 +0 val_200 val_200 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_213 val_213 +0 val_213 val_213 +0 val_213 val_213 +0 val_213 val_213 +0 val_217 val_217 +0 val_217 val_217 +0 val_217 val_217 +0 val_217 val_217 +0 val_219 val_219 +0 val_219 val_219 +0 val_219 val_219 +0 val_219 val_219 +0 val_224 val_224 +0 val_224 val_224 +0 val_224 val_224 +0 val_224 val_224 +0 val_233 val_233 +0 val_233 val_233 +0 val_233 val_233 +0 val_233 val_233 +0 val_237 val_237 +0 val_237 val_237 +0 val_237 val_237 +0 val_237 val_237 +0 val_239 val_239 +0 val_239 val_239 +0 val_239 val_239 +0 val_239 val_239 +0 val_24 val_24 +0 val_24 val_24 +0 val_24 val_24 +0 val_24 val_24 +0 val_242 val_242 +0 val_242 val_242 +0 val_242 val_242 +0 val_242 val_242 +0 val_255 val_255 +0 val_255 val_255 +0 val_255 val_255 +0 val_255 val_255 +0 val_26 val_26 +0 val_26 val_26 +0 val_26 val_26 +0 val_26 val_26 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_280 val_280 +0 val_280 val_280 +0 val_280 val_280 +0 val_280 val_280 +0 val_282 val_282 +0 val_282 val_282 +0 val_282 val_282 +0 val_282 val_282 +0 val_288 val_288 +0 val_288 val_288 +0 val_288 val_288 +0 val_288 val_288 +0 val_307 val_307 +0 val_307 val_307 +0 val_307 val_307 +0 val_307 val_307 +0 val_309 val_309 +0 val_309 val_309 +0 val_309 val_309 +0 val_309 val_309 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_321 val_321 +0 val_321 val_321 +0 val_321 val_321 +0 val_321 val_321 +0 val_325 val_325 +0 val_325 val_325 +0 val_325 val_325 +0 val_325 val_325 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_367 val_367 +0 val_367 val_367 +0 val_367 val_367 +0 val_367 val_367 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_37 val_37 +0 val_37 val_37 +0 val_37 val_37 +0 val_37 val_37 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_404 val_404 +0 val_404 val_404 +0 val_404 val_404 +0 val_404 val_404 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_413 val_413 +0 val_413 val_413 +0 val_413 val_413 +0 val_413 val_413 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_42 val_42 +0 val_42 val_42 +0 val_42 val_42 +0 val_42 val_42 +0 val_424 val_424 +0 val_424 val_424 +0 val_424 val_424 +0 val_424 val_424 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_439 val_439 +0 val_439 val_439 +0 val_439 val_439 +0 val_439 val_439 +0 val_459 val_459 +0 val_459 val_459 +0 val_459 val_459 +0 val_459 val_459 +0 val_462 val_462 +0 val_462 val_462 +0 val_462 val_462 +0 val_462 val_462 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_51 val_51 +0 val_51 val_51 +0 val_51 val_51 +0 val_51 val_51 +0 val_84 val_84 +0 val_84 val_84 +0 val_84 val_84 +0 val_84 val_84 +0 val_95 val_95 +0 val_95 val_95 +0 val_95 val_95 +0 val_95 val_95 +0 val_97 val_97 +0 val_97 val_97 +0 val_97 val_97 +0 val_97 val_97 +105 val_105 val_105 11 val_11 val_11 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -17 val_17 val_17 -19 val_19 val_19 -20 val_20 val_20 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -28 val_28 val_28 -33 val_33 val_33 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -44 val_44 val_44 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -53 val_53 val_53 -57 val_57 val_57 -64 val_64 val_64 -66 val_66 val_66 -77 val_77 val_77 -80 val_80 val_80 -82 val_82 val_82 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -86 val_86 val_86 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -103 val_103 val_103 -103 val_103 val_103 -103 val_103 val_103 -103 val_103 val_103 -105 val_105 val_105 114 val_114 val_114 116 val_116 val_116 -118 val_118 val_118 -118 val_118 val_118 -118 val_118 val_118 -118 val_118 val_118 -125 val_125 val_125 -125 val_125 val_125 -125 val_125 val_125 -125 val_125 val_125 -129 val_129 val_129 -129 val_129 val_129 -129 val_129 val_129 -129 val_129 val_129 -134 val_134 val_134 -134 val_134 val_134 -134 val_134 val_134 -134 val_134 val_134 136 val_136 val_136 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 143 val_143 val_143 145 val_145 val_145 -149 val_149 val_149 -149 val_149 val_149 -149 val_149 val_149 -149 val_149 val_149 150 val_150 val_150 -152 val_152 val_152 -152 val_152 val_152 -152 val_152 val_152 -152 val_152 val_152 156 val_156 val_156 158 val_158 val_158 163 val_163 val_163 -165 val_165 val_165 -165 val_165 val_165 -165 val_165 val_165 -165 val_165 val_165 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 +17 val_17 val_17 170 val_170 val_170 -172 val_172 val_172 -172 val_172 val_172 -172 val_172 val_172 -172 val_172 val_172 -174 val_174 val_174 -174 val_174 val_174 -174 val_174 val_174 -174 val_174 val_174 -176 val_176 val_176 -176 val_176 val_176 -176 val_176 val_176 -176 val_176 val_176 178 val_178 val_178 181 val_181 val_181 183 val_183 val_183 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 189 val_189 val_189 +19 val_19 val_19 190 val_190 val_190 192 val_192 val_192 194 val_194 val_194 196 val_196 val_196 -200 val_200 val_200 -200 val_200 val_200 -200 val_200 val_200 -200 val_200 val_200 +2 val_2 val_2 +20 val_20 val_20 202 val_202 val_202 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -217 val_217 val_217 -217 val_217 val_217 -217 val_217 val_217 -217 val_217 val_217 -219 val_219 val_219 -219 val_219 val_219 -219 val_219 val_219 -219 val_219 val_219 222 val_222 val_222 -224 val_224 val_224 -224 val_224 val_224 -224 val_224 val_224 -224 val_224 val_224 226 val_226 val_226 228 val_228 val_228 -233 val_233 val_233 -233 val_233 val_233 -233 val_233 val_233 -233 val_233 val_233 235 val_235 val_235 -237 val_237 val_237 -237 val_237 val_237 -237 val_237 val_237 -237 val_237 val_237 -239 val_239 val_239 -239 val_239 val_239 -239 val_239 val_239 -239 val_239 val_239 -242 val_242 val_242 -242 val_242 val_242 -242 val_242 val_242 -242 val_242 val_242 244 val_244 val_244 248 val_248 val_248 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 257 val_257 val_257 260 val_260 val_260 262 val_262 val_262 266 val_266 val_266 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 275 val_275 val_275 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -280 val_280 val_280 -280 val_280 val_280 -280 val_280 val_280 -280 val_280 val_280 -282 val_282 val_282 -282 val_282 val_282 -282 val_282 val_282 -282 val_282 val_282 +28 val_28 val_28 284 val_284 val_284 286 val_286 val_286 -288 val_288 val_288 -288 val_288 val_288 -288 val_288 val_288 -288 val_288 val_288 291 val_291 val_291 305 val_305 val_305 -307 val_307 val_307 -307 val_307 val_307 -307 val_307 val_307 -307 val_307 val_307 -309 val_309 val_309 -309 val_309 val_309 -309 val_309 val_309 -309 val_309 val_309 310 val_310 val_310 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -321 val_321 val_321 -321 val_321 val_321 -321 val_321 val_321 -321 val_321 val_321 323 val_323 val_323 -325 val_325 val_325 -325 val_325 val_325 -325 val_325 val_325 -325 val_325 val_325 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 +33 val_33 val_33 332 val_332 val_332 336 val_336 val_336 338 val_338 val_338 @@ -5178,147 +5281,44 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 345 val_345 val_345 356 val_356 val_356 365 val_365 val_365 -367 val_367 val_367 -367 val_367 val_367 -367 val_367 val_367 -367 val_367 val_367 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 374 val_374 val_374 378 val_378 val_378 389 val_389 val_389 392 val_392 val_392 394 val_394 val_394 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 +4 val_4 val_4 400 val_400 val_400 402 val_402 val_402 -404 val_404 val_404 -404 val_404 val_404 -404 val_404 val_404 -404 val_404 val_404 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 411 val_411 val_411 -413 val_413 val_413 -413 val_413 val_413 -413 val_413 val_413 -413 val_413 val_413 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 419 val_419 val_419 -424 val_424 val_424 -424 val_424 val_424 -424 val_424 val_424 -424 val_424 val_424 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 435 val_435 val_435 437 val_437 val_437 -439 val_439 val_439 -439 val_439 val_439 -439 val_439 val_439 -439 val_439 val_439 +44 val_44 val_44 444 val_444 val_444 446 val_446 val_446 448 val_448 val_448 453 val_453 val_453 455 val_455 val_455 457 val_457 val_457 -459 val_459 val_459 -459 val_459 val_459 -459 val_459 val_459 -459 val_459 val_459 460 val_460 val_460 -462 val_462 val_462 -462 val_462 val_462 -462 val_462 val_462 -462 val_462 val_462 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 475 val_475 val_475 477 val_477 val_477 479 val_479 val_479 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 482 val_482 val_482 484 val_484 val_484 491 val_491 val_491 493 val_493 val_493 495 val_495 val_495 497 val_497 val_497 +53 val_53 val_53 +57 val_57 val_57 +64 val_64 val_64 +66 val_66 val_66 +77 val_77 val_77 +8 val_8 val_8 +80 val_80 val_80 +82 val_82 val_82 +86 val_86 val_86 PREHOOK: query: explain select a.key, a.value, b.value from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value PREHOOK: type: QUERY @@ -5431,329 +5431,432 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 0 val_0 val_0 0 val_0 val_0 0 val_0 val_0 -2 val_2 val_2 -4 val_4 val_4 -8 val_8 val_8 -11 val_11 val_11 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -17 val_17 val_17 -19 val_19 val_19 -20 val_20 val_20 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -28 val_28 val_28 -33 val_33 val_33 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -44 val_44 val_44 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -53 val_53 val_53 -57 val_57 val_57 -64 val_64 val_64 -66 val_66 val_66 -77 val_77 val_77 -80 val_80 val_80 -82 val_82 val_82 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -86 val_86 val_86 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -103 val_103 val_103 -103 val_103 val_103 -103 val_103 val_103 -103 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_118 val_118 +0 val_118 val_118 +0 val_118 val_118 +0 val_118 val_118 +0 val_125 val_125 +0 val_125 val_125 +0 val_125 val_125 +0 val_125 val_125 +0 val_129 val_129 +0 val_129 val_129 +0 val_129 val_129 +0 val_129 val_129 +0 val_134 val_134 +0 val_134 val_134 +0 val_134 val_134 +0 val_134 val_134 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_149 val_149 +0 val_149 val_149 +0 val_149 val_149 +0 val_149 val_149 +0 val_15 val_15 +0 val_15 val_15 +0 val_15 val_15 +0 val_15 val_15 +0 val_152 val_152 +0 val_152 val_152 +0 val_152 val_152 +0 val_152 val_152 +0 val_165 val_165 +0 val_165 val_165 +0 val_165 val_165 +0 val_165 val_165 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_172 val_172 +0 val_172 val_172 +0 val_172 val_172 +0 val_172 val_172 +0 val_174 val_174 +0 val_174 val_174 +0 val_174 val_174 +0 val_174 val_174 +0 val_176 val_176 +0 val_176 val_176 +0 val_176 val_176 +0 val_176 val_176 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_200 val_200 +0 val_200 val_200 +0 val_200 val_200 +0 val_200 val_200 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_213 val_213 +0 val_213 val_213 +0 val_213 val_213 +0 val_213 val_213 +0 val_217 val_217 +0 val_217 val_217 +0 val_217 val_217 +0 val_217 val_217 +0 val_219 val_219 +0 val_219 val_219 +0 val_219 val_219 +0 val_219 val_219 +0 val_224 val_224 +0 val_224 val_224 +0 val_224 val_224 +0 val_224 val_224 +0 val_233 val_233 +0 val_233 val_233 +0 val_233 val_233 +0 val_233 val_233 +0 val_237 val_237 +0 val_237 val_237 +0 val_237 val_237 +0 val_237 val_237 +0 val_239 val_239 +0 val_239 val_239 +0 val_239 val_239 +0 val_239 val_239 +0 val_24 val_24 +0 val_24 val_24 +0 val_24 val_24 +0 val_24 val_24 +0 val_242 val_242 +0 val_242 val_242 +0 val_242 val_242 +0 val_242 val_242 +0 val_255 val_255 +0 val_255 val_255 +0 val_255 val_255 +0 val_255 val_255 +0 val_26 val_26 +0 val_26 val_26 +0 val_26 val_26 +0 val_26 val_26 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_280 val_280 +0 val_280 val_280 +0 val_280 val_280 +0 val_280 val_280 +0 val_282 val_282 +0 val_282 val_282 +0 val_282 val_282 +0 val_282 val_282 +0 val_288 val_288 +0 val_288 val_288 +0 val_288 val_288 +0 val_288 val_288 +0 val_307 val_307 +0 val_307 val_307 +0 val_307 val_307 +0 val_307 val_307 +0 val_309 val_309 +0 val_309 val_309 +0 val_309 val_309 +0 val_309 val_309 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_321 val_321 +0 val_321 val_321 +0 val_321 val_321 +0 val_321 val_321 +0 val_325 val_325 +0 val_325 val_325 +0 val_325 val_325 +0 val_325 val_325 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_367 val_367 +0 val_367 val_367 +0 val_367 val_367 +0 val_367 val_367 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_37 val_37 +0 val_37 val_37 +0 val_37 val_37 +0 val_37 val_37 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_404 val_404 +0 val_404 val_404 +0 val_404 val_404 +0 val_404 val_404 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_413 val_413 +0 val_413 val_413 +0 val_413 val_413 +0 val_413 val_413 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_42 val_42 +0 val_42 val_42 +0 val_42 val_42 +0 val_42 val_42 +0 val_424 val_424 +0 val_424 val_424 +0 val_424 val_424 +0 val_424 val_424 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_439 val_439 +0 val_439 val_439 +0 val_439 val_439 +0 val_439 val_439 +0 val_459 val_459 +0 val_459 val_459 +0 val_459 val_459 +0 val_459 val_459 +0 val_462 val_462 +0 val_462 val_462 +0 val_462 val_462 +0 val_462 val_462 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_51 val_51 +0 val_51 val_51 +0 val_51 val_51 +0 val_51 val_51 +0 val_84 val_84 +0 val_84 val_84 +0 val_84 val_84 +0 val_84 val_84 +0 val_95 val_95 +0 val_95 val_95 +0 val_95 val_95 +0 val_95 val_95 +0 val_97 val_97 +0 val_97 val_97 +0 val_97 val_97 +0 val_97 val_97 105 val_105 val_105 +11 val_11 val_11 114 val_114 val_114 116 val_116 val_116 -118 val_118 val_118 -118 val_118 val_118 -118 val_118 val_118 -118 val_118 val_118 -125 val_125 val_125 -125 val_125 val_125 -125 val_125 val_125 -125 val_125 val_125 -129 val_129 val_129 -129 val_129 val_129 -129 val_129 val_129 -129 val_129 val_129 -134 val_134 val_134 -134 val_134 val_134 -134 val_134 val_134 -134 val_134 val_134 136 val_136 val_136 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 143 val_143 val_143 145 val_145 val_145 -149 val_149 val_149 -149 val_149 val_149 -149 val_149 val_149 -149 val_149 val_149 150 val_150 val_150 -152 val_152 val_152 -152 val_152 val_152 -152 val_152 val_152 -152 val_152 val_152 156 val_156 val_156 158 val_158 val_158 163 val_163 val_163 -165 val_165 val_165 -165 val_165 val_165 -165 val_165 val_165 -165 val_165 val_165 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 +17 val_17 val_17 170 val_170 val_170 -172 val_172 val_172 -172 val_172 val_172 -172 val_172 val_172 -172 val_172 val_172 -174 val_174 val_174 -174 val_174 val_174 -174 val_174 val_174 -174 val_174 val_174 -176 val_176 val_176 -176 val_176 val_176 -176 val_176 val_176 -176 val_176 val_176 178 val_178 val_178 181 val_181 val_181 183 val_183 val_183 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 189 val_189 val_189 +19 val_19 val_19 190 val_190 val_190 192 val_192 val_192 194 val_194 val_194 196 val_196 val_196 -200 val_200 val_200 -200 val_200 val_200 -200 val_200 val_200 -200 val_200 val_200 +2 val_2 val_2 +20 val_20 val_20 202 val_202 val_202 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -217 val_217 val_217 -217 val_217 val_217 -217 val_217 val_217 -217 val_217 val_217 -219 val_219 val_219 -219 val_219 val_219 -219 val_219 val_219 -219 val_219 val_219 222 val_222 val_222 -224 val_224 val_224 -224 val_224 val_224 -224 val_224 val_224 -224 val_224 val_224 226 val_226 val_226 228 val_228 val_228 -233 val_233 val_233 -233 val_233 val_233 -233 val_233 val_233 -233 val_233 val_233 235 val_235 val_235 -237 val_237 val_237 -237 val_237 val_237 -237 val_237 val_237 -237 val_237 val_237 -239 val_239 val_239 -239 val_239 val_239 -239 val_239 val_239 -239 val_239 val_239 -242 val_242 val_242 -242 val_242 val_242 -242 val_242 val_242 -242 val_242 val_242 244 val_244 val_244 248 val_248 val_248 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 257 val_257 val_257 260 val_260 val_260 262 val_262 val_262 266 val_266 val_266 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 275 val_275 val_275 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -280 val_280 val_280 -280 val_280 val_280 -280 val_280 val_280 -280 val_280 val_280 -282 val_282 val_282 -282 val_282 val_282 -282 val_282 val_282 -282 val_282 val_282 -284 val_284 val_284 -286 val_286 val_286 -288 val_288 val_288 -288 val_288 val_288 -288 val_288 val_288 -288 val_288 val_288 -291 val_291 val_291 -305 val_305 val_305 -307 val_307 val_307 -307 val_307 val_307 -307 val_307 val_307 -307 val_307 val_307 -309 val_309 val_309 -309 val_309 val_309 -309 val_309 val_309 -309 val_309 val_309 -310 val_310 val_310 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -321 val_321 val_321 -321 val_321 val_321 -321 val_321 val_321 -321 val_321 val_321 +28 val_28 val_28 +284 val_284 val_284 +286 val_286 val_286 +291 val_291 val_291 +305 val_305 val_305 +310 val_310 val_310 323 val_323 val_323 -325 val_325 val_325 -325 val_325 val_325 -325 val_325 val_325 -325 val_325 val_325 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 +33 val_33 val_33 332 val_332 val_332 336 val_336 val_336 338 val_338 val_338 @@ -5761,147 +5864,44 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 345 val_345 val_345 356 val_356 val_356 365 val_365 val_365 -367 val_367 val_367 -367 val_367 val_367 -367 val_367 val_367 -367 val_367 val_367 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 374 val_374 val_374 378 val_378 val_378 389 val_389 val_389 392 val_392 val_392 394 val_394 val_394 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 +4 val_4 val_4 400 val_400 val_400 402 val_402 val_402 -404 val_404 val_404 -404 val_404 val_404 -404 val_404 val_404 -404 val_404 val_404 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 411 val_411 val_411 -413 val_413 val_413 -413 val_413 val_413 -413 val_413 val_413 -413 val_413 val_413 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 419 val_419 val_419 -424 val_424 val_424 -424 val_424 val_424 -424 val_424 val_424 -424 val_424 val_424 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 435 val_435 val_435 437 val_437 val_437 -439 val_439 val_439 -439 val_439 val_439 -439 val_439 val_439 -439 val_439 val_439 +44 val_44 val_44 444 val_444 val_444 446 val_446 val_446 448 val_448 val_448 453 val_453 val_453 455 val_455 val_455 457 val_457 val_457 -459 val_459 val_459 -459 val_459 val_459 -459 val_459 val_459 -459 val_459 val_459 460 val_460 val_460 -462 val_462 val_462 -462 val_462 val_462 -462 val_462 val_462 -462 val_462 val_462 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 475 val_475 val_475 477 val_477 val_477 479 val_479 val_479 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 482 val_482 val_482 484 val_484 val_484 491 val_491 val_491 493 val_493 val_493 495 val_495 val_495 497 val_497 val_497 +53 val_53 val_53 +57 val_57 val_57 +64 val_64 val_64 +66 val_66 val_66 +77 val_77 val_77 +8 val_8 val_8 +80 val_80 val_80 +82 val_82 val_82 +86 val_86 val_86 PREHOOK: query: CREATE TABLE tab2_n4(key int, value string) PARTITIONED BY(ds STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -6041,338 +6041,441 @@ POSTHOOK: Input: default@tab2_n4@ds=2008-04-08 POSTHOOK: Input: default@tab_part_n9 POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 #### A masked pattern was here #### -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -0 val_0 val_0 -2 val_2 val_2 -4 val_4 val_4 -8 val_8 val_8 -11 val_11 val_11 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -17 val_17 val_17 -19 val_19 val_19 -20 val_20 val_20 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -28 val_28 val_28 -33 val_33 val_33 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -44 val_44 val_44 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -53 val_53 val_53 -57 val_57 val_57 -64 val_64 val_64 -66 val_66 val_66 -77 val_77 val_77 -80 val_80 val_80 -82 val_82 val_82 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -86 val_86 val_86 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -103 val_103 val_103 -103 val_103 val_103 -103 val_103 val_103 -103 val_103 val_103 -105 val_105 val_105 -114 val_114 val_114 -116 val_116 val_116 -118 val_118 val_118 -118 val_118 val_118 -118 val_118 val_118 -118 val_118 val_118 -125 val_125 val_125 -125 val_125 val_125 -125 val_125 val_125 -125 val_125 val_125 -129 val_129 val_129 -129 val_129 val_129 -129 val_129 val_129 -129 val_129 val_129 -134 val_134 val_134 -134 val_134 val_134 -134 val_134 val_134 -134 val_134 val_134 -136 val_136 val_136 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_118 val_118 +0 val_118 val_118 +0 val_118 val_118 +0 val_118 val_118 +0 val_125 val_125 +0 val_125 val_125 +0 val_125 val_125 +0 val_125 val_125 +0 val_129 val_129 +0 val_129 val_129 +0 val_129 val_129 +0 val_129 val_129 +0 val_134 val_134 +0 val_134 val_134 +0 val_134 val_134 +0 val_134 val_134 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_149 val_149 +0 val_149 val_149 +0 val_149 val_149 +0 val_149 val_149 +0 val_15 val_15 +0 val_15 val_15 +0 val_15 val_15 +0 val_15 val_15 +0 val_152 val_152 +0 val_152 val_152 +0 val_152 val_152 +0 val_152 val_152 +0 val_165 val_165 +0 val_165 val_165 +0 val_165 val_165 +0 val_165 val_165 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_172 val_172 +0 val_172 val_172 +0 val_172 val_172 +0 val_172 val_172 +0 val_174 val_174 +0 val_174 val_174 +0 val_174 val_174 +0 val_174 val_174 +0 val_176 val_176 +0 val_176 val_176 +0 val_176 val_176 +0 val_176 val_176 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_200 val_200 +0 val_200 val_200 +0 val_200 val_200 +0 val_200 val_200 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_213 val_213 +0 val_213 val_213 +0 val_213 val_213 +0 val_213 val_213 +0 val_217 val_217 +0 val_217 val_217 +0 val_217 val_217 +0 val_217 val_217 +0 val_219 val_219 +0 val_219 val_219 +0 val_219 val_219 +0 val_219 val_219 +0 val_224 val_224 +0 val_224 val_224 +0 val_224 val_224 +0 val_224 val_224 +0 val_233 val_233 +0 val_233 val_233 +0 val_233 val_233 +0 val_233 val_233 +0 val_237 val_237 +0 val_237 val_237 +0 val_237 val_237 +0 val_237 val_237 +0 val_239 val_239 +0 val_239 val_239 +0 val_239 val_239 +0 val_239 val_239 +0 val_24 val_24 +0 val_24 val_24 +0 val_24 val_24 +0 val_24 val_24 +0 val_242 val_242 +0 val_242 val_242 +0 val_242 val_242 +0 val_242 val_242 +0 val_255 val_255 +0 val_255 val_255 +0 val_255 val_255 +0 val_255 val_255 +0 val_26 val_26 +0 val_26 val_26 +0 val_26 val_26 +0 val_26 val_26 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_280 val_280 +0 val_280 val_280 +0 val_280 val_280 +0 val_280 val_280 +0 val_282 val_282 +0 val_282 val_282 +0 val_282 val_282 +0 val_282 val_282 +0 val_288 val_288 +0 val_288 val_288 +0 val_288 val_288 +0 val_288 val_288 +0 val_307 val_307 +0 val_307 val_307 +0 val_307 val_307 +0 val_307 val_307 +0 val_309 val_309 +0 val_309 val_309 +0 val_309 val_309 +0 val_309 val_309 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_321 val_321 +0 val_321 val_321 +0 val_321 val_321 +0 val_321 val_321 +0 val_325 val_325 +0 val_325 val_325 +0 val_325 val_325 +0 val_325 val_325 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_367 val_367 +0 val_367 val_367 +0 val_367 val_367 +0 val_367 val_367 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_37 val_37 +0 val_37 val_37 +0 val_37 val_37 +0 val_37 val_37 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_404 val_404 +0 val_404 val_404 +0 val_404 val_404 +0 val_404 val_404 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_413 val_413 +0 val_413 val_413 +0 val_413 val_413 +0 val_413 val_413 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_42 val_42 +0 val_42 val_42 +0 val_42 val_42 +0 val_42 val_42 +0 val_424 val_424 +0 val_424 val_424 +0 val_424 val_424 +0 val_424 val_424 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_439 val_439 +0 val_439 val_439 +0 val_439 val_439 +0 val_439 val_439 +0 val_459 val_459 +0 val_459 val_459 +0 val_459 val_459 +0 val_459 val_459 +0 val_462 val_462 +0 val_462 val_462 +0 val_462 val_462 +0 val_462 val_462 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_51 val_51 +0 val_51 val_51 +0 val_51 val_51 +0 val_51 val_51 +0 val_84 val_84 +0 val_84 val_84 +0 val_84 val_84 +0 val_84 val_84 +0 val_95 val_95 +0 val_95 val_95 +0 val_95 val_95 +0 val_95 val_95 +0 val_97 val_97 +0 val_97 val_97 +0 val_97 val_97 +0 val_97 val_97 +105 val_105 val_105 +11 val_11 val_11 +114 val_114 val_114 +116 val_116 val_116 +136 val_136 val_136 143 val_143 val_143 145 val_145 val_145 -149 val_149 val_149 -149 val_149 val_149 -149 val_149 val_149 -149 val_149 val_149 150 val_150 val_150 -152 val_152 val_152 -152 val_152 val_152 -152 val_152 val_152 -152 val_152 val_152 156 val_156 val_156 158 val_158 val_158 163 val_163 val_163 -165 val_165 val_165 -165 val_165 val_165 -165 val_165 val_165 -165 val_165 val_165 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 +17 val_17 val_17 170 val_170 val_170 -172 val_172 val_172 -172 val_172 val_172 -172 val_172 val_172 -172 val_172 val_172 -174 val_174 val_174 -174 val_174 val_174 -174 val_174 val_174 -174 val_174 val_174 -176 val_176 val_176 -176 val_176 val_176 -176 val_176 val_176 -176 val_176 val_176 178 val_178 val_178 181 val_181 val_181 183 val_183 val_183 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 189 val_189 val_189 +19 val_19 val_19 190 val_190 val_190 192 val_192 val_192 194 val_194 val_194 196 val_196 val_196 -200 val_200 val_200 -200 val_200 val_200 -200 val_200 val_200 -200 val_200 val_200 +2 val_2 val_2 +20 val_20 val_20 202 val_202 val_202 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -217 val_217 val_217 -217 val_217 val_217 -217 val_217 val_217 -217 val_217 val_217 -219 val_219 val_219 -219 val_219 val_219 -219 val_219 val_219 -219 val_219 val_219 222 val_222 val_222 -224 val_224 val_224 -224 val_224 val_224 -224 val_224 val_224 -224 val_224 val_224 226 val_226 val_226 228 val_228 val_228 -233 val_233 val_233 -233 val_233 val_233 -233 val_233 val_233 -233 val_233 val_233 235 val_235 val_235 -237 val_237 val_237 -237 val_237 val_237 -237 val_237 val_237 -237 val_237 val_237 -239 val_239 val_239 -239 val_239 val_239 -239 val_239 val_239 -239 val_239 val_239 -242 val_242 val_242 -242 val_242 val_242 -242 val_242 val_242 -242 val_242 val_242 244 val_244 val_244 248 val_248 val_248 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 257 val_257 val_257 260 val_260 val_260 262 val_262 val_262 266 val_266 val_266 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 275 val_275 val_275 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -280 val_280 val_280 -280 val_280 val_280 -280 val_280 val_280 -280 val_280 val_280 -282 val_282 val_282 -282 val_282 val_282 -282 val_282 val_282 -282 val_282 val_282 +28 val_28 val_28 284 val_284 val_284 286 val_286 val_286 -288 val_288 val_288 -288 val_288 val_288 -288 val_288 val_288 -288 val_288 val_288 291 val_291 val_291 305 val_305 val_305 -307 val_307 val_307 -307 val_307 val_307 -307 val_307 val_307 -307 val_307 val_307 -309 val_309 val_309 -309 val_309 val_309 -309 val_309 val_309 -309 val_309 val_309 310 val_310 val_310 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -321 val_321 val_321 -321 val_321 val_321 -321 val_321 val_321 -321 val_321 val_321 323 val_323 val_323 -325 val_325 val_325 -325 val_325 val_325 -325 val_325 val_325 -325 val_325 val_325 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 +33 val_33 val_33 332 val_332 val_332 336 val_336 val_336 338 val_338 val_338 @@ -6380,147 +6483,44 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 345 val_345 val_345 356 val_356 val_356 365 val_365 val_365 -367 val_367 val_367 -367 val_367 val_367 -367 val_367 val_367 -367 val_367 val_367 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -374 val_374 val_374 -378 val_378 val_378 -389 val_389 val_389 -392 val_392 val_392 -394 val_394 val_394 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -400 val_400 val_400 -402 val_402 val_402 -404 val_404 val_404 -404 val_404 val_404 -404 val_404 val_404 -404 val_404 val_404 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 +374 val_374 val_374 +378 val_378 val_378 +389 val_389 val_389 +392 val_392 val_392 +394 val_394 val_394 +4 val_4 val_4 +400 val_400 val_400 +402 val_402 val_402 411 val_411 val_411 -413 val_413 val_413 -413 val_413 val_413 -413 val_413 val_413 -413 val_413 val_413 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 419 val_419 val_419 -424 val_424 val_424 -424 val_424 val_424 -424 val_424 val_424 -424 val_424 val_424 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 435 val_435 val_435 437 val_437 val_437 -439 val_439 val_439 -439 val_439 val_439 -439 val_439 val_439 -439 val_439 val_439 +44 val_44 val_44 444 val_444 val_444 446 val_446 val_446 448 val_448 val_448 453 val_453 val_453 455 val_455 val_455 457 val_457 val_457 -459 val_459 val_459 -459 val_459 val_459 -459 val_459 val_459 -459 val_459 val_459 460 val_460 val_460 -462 val_462 val_462 -462 val_462 val_462 -462 val_462 val_462 -462 val_462 val_462 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 475 val_475 val_475 477 val_477 val_477 479 val_479 val_479 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 482 val_482 val_482 484 val_484 val_484 491 val_491 val_491 493 val_493 val_493 495 val_495 val_495 497 val_497 val_497 +53 val_53 val_53 +57 val_57 val_57 +64 val_64 val_64 +66 val_66 val_66 +77 val_77 val_77 +8 val_8 val_8 +80 val_80 val_80 +82 val_82 val_82 +86 val_86 val_86 PREHOOK: query: explain select a.key, a.value, b.value from tab2_n4 a join tab_part_n9 b on a.key = b.key and a.value = b.value PREHOOK: type: QUERY @@ -6633,329 +6633,432 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 0 val_0 val_0 0 val_0 val_0 0 val_0 val_0 -2 val_2 val_2 -4 val_4 val_4 -8 val_8 val_8 -11 val_11 val_11 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -17 val_17 val_17 -19 val_19 val_19 -20 val_20 val_20 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -28 val_28 val_28 -33 val_33 val_33 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -44 val_44 val_44 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -53 val_53 val_53 -57 val_57 val_57 -64 val_64 val_64 -66 val_66 val_66 -77 val_77 val_77 -80 val_80 val_80 -82 val_82 val_82 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -86 val_86 val_86 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -103 val_103 val_103 -103 val_103 val_103 -103 val_103 val_103 -103 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_103 val_103 +0 val_118 val_118 +0 val_118 val_118 +0 val_118 val_118 +0 val_118 val_118 +0 val_125 val_125 +0 val_125 val_125 +0 val_125 val_125 +0 val_125 val_125 +0 val_129 val_129 +0 val_129 val_129 +0 val_129 val_129 +0 val_129 val_129 +0 val_134 val_134 +0 val_134 val_134 +0 val_134 val_134 +0 val_134 val_134 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_138 val_138 +0 val_149 val_149 +0 val_149 val_149 +0 val_149 val_149 +0 val_149 val_149 +0 val_15 val_15 +0 val_15 val_15 +0 val_15 val_15 +0 val_15 val_15 +0 val_152 val_152 +0 val_152 val_152 +0 val_152 val_152 +0 val_152 val_152 +0 val_165 val_165 +0 val_165 val_165 +0 val_165 val_165 +0 val_165 val_165 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_167 val_167 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_169 val_169 +0 val_172 val_172 +0 val_172 val_172 +0 val_172 val_172 +0 val_172 val_172 +0 val_174 val_174 +0 val_174 val_174 +0 val_174 val_174 +0 val_174 val_174 +0 val_176 val_176 +0 val_176 val_176 +0 val_176 val_176 +0 val_176 val_176 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_187 val_187 +0 val_200 val_200 +0 val_200 val_200 +0 val_200 val_200 +0 val_200 val_200 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_208 val_208 +0 val_213 val_213 +0 val_213 val_213 +0 val_213 val_213 +0 val_213 val_213 +0 val_217 val_217 +0 val_217 val_217 +0 val_217 val_217 +0 val_217 val_217 +0 val_219 val_219 +0 val_219 val_219 +0 val_219 val_219 +0 val_219 val_219 +0 val_224 val_224 +0 val_224 val_224 +0 val_224 val_224 +0 val_224 val_224 +0 val_233 val_233 +0 val_233 val_233 +0 val_233 val_233 +0 val_233 val_233 +0 val_237 val_237 +0 val_237 val_237 +0 val_237 val_237 +0 val_237 val_237 +0 val_239 val_239 +0 val_239 val_239 +0 val_239 val_239 +0 val_239 val_239 +0 val_24 val_24 +0 val_24 val_24 +0 val_24 val_24 +0 val_24 val_24 +0 val_242 val_242 +0 val_242 val_242 +0 val_242 val_242 +0 val_242 val_242 +0 val_255 val_255 +0 val_255 val_255 +0 val_255 val_255 +0 val_255 val_255 +0 val_26 val_26 +0 val_26 val_26 +0 val_26 val_26 +0 val_26 val_26 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_273 val_273 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_277 val_277 +0 val_280 val_280 +0 val_280 val_280 +0 val_280 val_280 +0 val_280 val_280 +0 val_282 val_282 +0 val_282 val_282 +0 val_282 val_282 +0 val_282 val_282 +0 val_288 val_288 +0 val_288 val_288 +0 val_288 val_288 +0 val_288 val_288 +0 val_307 val_307 +0 val_307 val_307 +0 val_307 val_307 +0 val_307 val_307 +0 val_309 val_309 +0 val_309 val_309 +0 val_309 val_309 +0 val_309 val_309 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_316 val_316 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_318 val_318 +0 val_321 val_321 +0 val_321 val_321 +0 val_321 val_321 +0 val_321 val_321 +0 val_325 val_325 +0 val_325 val_325 +0 val_325 val_325 +0 val_325 val_325 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_327 val_327 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_35 val_35 +0 val_367 val_367 +0 val_367 val_367 +0 val_367 val_367 +0 val_367 val_367 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_369 val_369 +0 val_37 val_37 +0 val_37 val_37 +0 val_37 val_37 +0 val_37 val_37 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_396 val_396 +0 val_404 val_404 +0 val_404 val_404 +0 val_404 val_404 +0 val_404 val_404 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_406 val_406 +0 val_413 val_413 +0 val_413 val_413 +0 val_413 val_413 +0 val_413 val_413 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_417 val_417 +0 val_42 val_42 +0 val_42 val_42 +0 val_42 val_42 +0 val_42 val_42 +0 val_424 val_424 +0 val_424 val_424 +0 val_424 val_424 +0 val_424 val_424 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_431 val_431 +0 val_439 val_439 +0 val_439 val_439 +0 val_439 val_439 +0 val_439 val_439 +0 val_459 val_459 +0 val_459 val_459 +0 val_459 val_459 +0 val_459 val_459 +0 val_462 val_462 +0 val_462 val_462 +0 val_462 val_462 +0 val_462 val_462 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_466 val_466 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_468 val_468 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_480 val_480 +0 val_51 val_51 +0 val_51 val_51 +0 val_51 val_51 +0 val_51 val_51 +0 val_84 val_84 +0 val_84 val_84 +0 val_84 val_84 +0 val_84 val_84 +0 val_95 val_95 +0 val_95 val_95 +0 val_95 val_95 +0 val_95 val_95 +0 val_97 val_97 +0 val_97 val_97 +0 val_97 val_97 +0 val_97 val_97 105 val_105 val_105 +11 val_11 val_11 114 val_114 val_114 116 val_116 val_116 -118 val_118 val_118 -118 val_118 val_118 -118 val_118 val_118 -118 val_118 val_118 -125 val_125 val_125 -125 val_125 val_125 -125 val_125 val_125 -125 val_125 val_125 -129 val_129 val_129 -129 val_129 val_129 -129 val_129 val_129 -129 val_129 val_129 -134 val_134 val_134 -134 val_134 val_134 -134 val_134 val_134 -134 val_134 val_134 136 val_136 val_136 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 -138 val_138 val_138 143 val_143 val_143 145 val_145 val_145 -149 val_149 val_149 -149 val_149 val_149 -149 val_149 val_149 -149 val_149 val_149 150 val_150 val_150 -152 val_152 val_152 -152 val_152 val_152 -152 val_152 val_152 -152 val_152 val_152 156 val_156 val_156 158 val_158 val_158 163 val_163 val_163 -165 val_165 val_165 -165 val_165 val_165 -165 val_165 val_165 -165 val_165 val_165 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -167 val_167 val_167 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 -169 val_169 val_169 +17 val_17 val_17 170 val_170 val_170 -172 val_172 val_172 -172 val_172 val_172 -172 val_172 val_172 -172 val_172 val_172 -174 val_174 val_174 -174 val_174 val_174 -174 val_174 val_174 -174 val_174 val_174 -176 val_176 val_176 -176 val_176 val_176 -176 val_176 val_176 -176 val_176 val_176 178 val_178 val_178 181 val_181 val_181 183 val_183 val_183 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 -187 val_187 val_187 189 val_189 val_189 +19 val_19 val_19 190 val_190 val_190 192 val_192 val_192 -194 val_194 val_194 -196 val_196 val_196 -200 val_200 val_200 -200 val_200 val_200 -200 val_200 val_200 -200 val_200 val_200 +194 val_194 val_194 +196 val_196 val_196 +2 val_2 val_2 +20 val_20 val_20 202 val_202 val_202 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -208 val_208 val_208 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -217 val_217 val_217 -217 val_217 val_217 -217 val_217 val_217 -217 val_217 val_217 -219 val_219 val_219 -219 val_219 val_219 -219 val_219 val_219 -219 val_219 val_219 222 val_222 val_222 -224 val_224 val_224 -224 val_224 val_224 -224 val_224 val_224 -224 val_224 val_224 226 val_226 val_226 228 val_228 val_228 -233 val_233 val_233 -233 val_233 val_233 -233 val_233 val_233 -233 val_233 val_233 235 val_235 val_235 -237 val_237 val_237 -237 val_237 val_237 -237 val_237 val_237 -237 val_237 val_237 -239 val_239 val_239 -239 val_239 val_239 -239 val_239 val_239 -239 val_239 val_239 -242 val_242 val_242 -242 val_242 val_242 -242 val_242 val_242 -242 val_242 val_242 244 val_244 val_244 248 val_248 val_248 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 257 val_257 val_257 260 val_260 val_260 262 val_262 val_262 266 val_266 val_266 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 275 val_275 val_275 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -277 val_277 val_277 -280 val_280 val_280 -280 val_280 val_280 -280 val_280 val_280 -280 val_280 val_280 -282 val_282 val_282 -282 val_282 val_282 -282 val_282 val_282 -282 val_282 val_282 +28 val_28 val_28 284 val_284 val_284 286 val_286 val_286 -288 val_288 val_288 -288 val_288 val_288 -288 val_288 val_288 -288 val_288 val_288 291 val_291 val_291 305 val_305 val_305 -307 val_307 val_307 -307 val_307 val_307 -307 val_307 val_307 -307 val_307 val_307 -309 val_309 val_309 -309 val_309 val_309 -309 val_309 val_309 -309 val_309 val_309 310 val_310 val_310 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -316 val_316 val_316 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -318 val_318 val_318 -321 val_321 val_321 -321 val_321 val_321 -321 val_321 val_321 -321 val_321 val_321 323 val_323 val_323 -325 val_325 val_325 -325 val_325 val_325 -325 val_325 val_325 -325 val_325 val_325 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 -327 val_327 val_327 +33 val_33 val_33 332 val_332 val_332 336 val_336 val_336 338 val_338 val_338 @@ -6963,144 +7066,41 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 345 val_345 val_345 356 val_356 val_356 365 val_365 val_365 -367 val_367 val_367 -367 val_367 val_367 -367 val_367 val_367 -367 val_367 val_367 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 -369 val_369 val_369 374 val_374 val_374 378 val_378 val_378 389 val_389 val_389 392 val_392 val_392 394 val_394 val_394 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 -396 val_396 val_396 +4 val_4 val_4 400 val_400 val_400 402 val_402 val_402 -404 val_404 val_404 -404 val_404 val_404 -404 val_404 val_404 -404 val_404 val_404 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 411 val_411 val_411 -413 val_413 val_413 -413 val_413 val_413 -413 val_413 val_413 -413 val_413 val_413 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 -417 val_417 val_417 419 val_419 val_419 -424 val_424 val_424 -424 val_424 val_424 -424 val_424 val_424 -424 val_424 val_424 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 -431 val_431 val_431 435 val_435 val_435 437 val_437 val_437 -439 val_439 val_439 -439 val_439 val_439 -439 val_439 val_439 -439 val_439 val_439 +44 val_44 val_44 444 val_444 val_444 446 val_446 val_446 448 val_448 val_448 453 val_453 val_453 455 val_455 val_455 457 val_457 val_457 -459 val_459 val_459 -459 val_459 val_459 -459 val_459 val_459 -459 val_459 val_459 460 val_460 val_460 -462 val_462 val_462 -462 val_462 val_462 -462 val_462 val_462 -462 val_462 val_462 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -466 val_466 val_466 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 -468 val_468 val_468 475 val_475 val_475 477 val_477 val_477 479 val_479 val_479 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 -480 val_480 val_480 482 val_482 val_482 484 val_484 val_484 491 val_491 val_491 493 val_493 val_493 495 val_495 val_495 497 val_497 val_497 +53 val_53 val_53 +57 val_57 val_57 +64 val_64 val_64 +66 val_66 val_66 +77 val_77 val_77 +8 val_8 val_8 +80 val_80 val_80 +82 val_82 val_82 +86 val_86 val_86 diff --git ql/src/test/results/clientpositive/spark/join18.q.out ql/src/test/results/clientpositive/spark/join18.q.out index 478b6a6..659a1c3 100644 --- ql/src/test/results/clientpositive/spark/join18.q.out +++ ql/src/test/results/clientpositive/spark/join18.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out index c7b18d7..75009b5 100644 --- ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out @@ -100,7 +100,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join32.q.out ql/src/test/results/clientpositive/spark/join32.q.out index 665cf67..62ef1a3 100644 --- ql/src/test/results/clientpositive/spark/join32.q.out +++ ql/src/test/results/clientpositive/spark/join32.q.out @@ -393,88 +393,88 @@ POSTHOOK: query: select * from dest_j1_n12 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1_n12 #### A masked pattern was here #### -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 150 val_150 val_150 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 66 val_66 val_66 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 diff --git ql/src/test/results/clientpositive/spark/join33.q.out ql/src/test/results/clientpositive/spark/join33.q.out index 13cd446..09198b0 100644 --- ql/src/test/results/clientpositive/spark/join33.q.out +++ ql/src/test/results/clientpositive/spark/join33.q.out @@ -393,88 +393,88 @@ POSTHOOK: query: select * from dest_j1_n7 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1_n7 #### A masked pattern was here #### -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 150 val_150 val_150 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 66 val_66 val_66 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 diff --git ql/src/test/results/clientpositive/spark/join6.q.out ql/src/test/results/clientpositive/spark/join6.q.out index 6075e5f..caa0849 100644 --- ql/src/test/results/clientpositive/spark/join6.q.out +++ ql/src/test/results/clientpositive/spark/join6.q.out @@ -92,7 +92,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join7.q.out ql/src/test/results/clientpositive/spark/join7.q.out index 9ebb3e3..423e518 100644 --- ql/src/test/results/clientpositive/spark/join7.q.out +++ ql/src/test/results/clientpositive/spark/join7.q.out @@ -122,7 +122,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out index 26acd7e..749b926 100644 --- ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -1312,7 +1312,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 Left Outer Join 0 to 3 filter mappings: diff --git ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out index 1a3bf0d..d334ca4 100644 --- ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out @@ -895,7 +895,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col3 (type: string) 1 _col0 (type: string) @@ -911,7 +911,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1205,7 +1205,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1499,7 +1499,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index 288c38d..0047046 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -62,10 +62,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -117,10 +116,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -241,10 +240,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -296,10 +294,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -568,10 +565,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -623,10 +619,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -747,10 +743,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -802,10 +797,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1074,10 +1068,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:float, 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -1129,10 +1122,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1253,10 +1246,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized @@ -1308,10 +1300,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1627,10 +1618,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6] + valueColumns: 0:bigint, 1:bigint, 2:double, 3:double, 4:bigint, 5:double, 6:tinyint Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index 5c79743..565d7d9 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -96,10 +96,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 0:double, 1:double, 2:bigint, 3:double, 4:tinyint, 5:int, 6:double, 7:double, 8:bigint, 9:bigint Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: tinyint), _col5 (type: int), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out index de7abe0..42b43a1 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out @@ -123,10 +123,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:double, 1:bigint, 2:string, 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8, 9, 10] + valueColumns: 4:bigint, 5:double, 6:double, 7:double, 8:bigint, 9:bigint, 10:double Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized @@ -189,10 +189,10 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:bigint, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] + valueColumns: 3:boolean, 11:double, 12:bigint, 4:bigint, 13:bigint, 14:double, 19:double, 15:double, 20:double, 22:double, 24:decimal(22,2), 9:bigint, 26:double, 25:double, 21:double, 27:double Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out index 09b50c7..2b8491b 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out @@ -125,10 +125,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + valueColumns: 5:tinyint, 6:double, 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:bigint, 13:float, 14:tinyint Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized @@ -191,10 +191,9 @@ STAGE PLANS: sort order: +++++++++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string, 15:tinyint, 5:tinyint, 17:tinyint, 6:double, 20:double, 19:double, 21:float, 22:double, 23:double, 24:double, 27:decimal(7,3), 28:double, 25:double, 13:float, 31:double, 14:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out index ca43d13..64da4dc 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out @@ -125,10 +125,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp, 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10, 11] + valueColumns: 5:double, 6:double, 7:bigint, 8:float, 9:double, 10:double, 11:bigint Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized @@ -191,10 +191,10 @@ STAGE PLANS: sort order: ++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] + valueColumns: 4:boolean, 12:double, 14:double, 13:double, 15:float, 8:float, 19:float, 20:float, 21:double, 22:double, 11:bigint, 24:double, 25:double, 23:double, 29:double, 28:double, 31:double, 34:double Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out index 8497a7e..48f3a2f 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out @@ -121,10 +121,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4, 5, 6] + keyColumns: 0:float, 1:boolean, 2:double, 3:string, 4:tinyint, 5:int, 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + valueColumns: 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:double, 13:bigint, 14:double, 15:double, 16:bigint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out index a85f4d3..1b8aa67 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out @@ -98,10 +98,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:double, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6] + valueColumns: 3:bigint, 4:double, 5:double, 6:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out index 4ee2961..96ee32a 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out @@ -91,10 +91,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] + keyColumns: 3:bigint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] + valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 14:double, 17:double, 19:double, 20:double, 22:decimal(11,4), 18:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index 390b1df..176e9d4 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -100,10 +100,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 0:bigint, 1:bigint, 2:double, 3:double, 4:double, 5:bigint, 6:bigint, 7:tinyint, 8:double, 9:bigint Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index 140f009..c940e06 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -105,10 +105,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + valueColumns: 0:double, 1:double, 2:bigint, 3:double, 4:double, 5:bigint, 6:double, 7:double, 8:bigint, 9:double, 10:bigint, 11:bigint, 12:double, 13:double Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index adf6d60..a1f8e46 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -100,10 +100,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:bigint, 1:double, 2:double, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out index 5f2e2ca..5e20242 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out @@ -93,10 +93,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:smallint, 1:bigint, 2:smallint, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out index 67cd1fe..d99de6e 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out @@ -97,10 +97,9 @@ STAGE PLANS: sort order: +++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] + keyColumns: 10:boolean, 3:bigint, 1:smallint, 0:tinyint, 8:timestamp, 6:string, 14:bigint, 15:int, 16:smallint, 17:tinyint, 19:int, 20:bigint, 18:int, 21:tinyint, 23:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out index 8de1d3b..9673921 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out @@ -93,10 +93,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] + keyColumns: 8:timestamp, 5:double, 10:boolean, 6:string, 4:float, 13:double, 14:double, 15:double, 17:float, 19:double, 16:double, 18:float, 20:float, 22:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index a85f4d3..1b8aa67 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -98,10 +98,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:double, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6] + valueColumns: 3:bigint, 4:double, 5:double, 6:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index b2c0a64..66155f3 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -122,10 +122,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5] + keyColumns: 0:tinyint, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:smallint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) @@ -278,11 +278,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1, 2] + partitionColumns: 0:tinyint + valueColumns: 1:double, 2:bigint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) @@ -447,10 +447,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized @@ -603,11 +602,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:tinyint, 1:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -804,10 +802,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized @@ -861,10 +859,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:bigint, 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 3 diff --git ql/src/test/results/clientpositive/spark/semijoin.q.out ql/src/test/results/clientpositive/spark/semijoin.q.out index 5455b36..87d8d40 100644 --- ql/src/test/results/clientpositive/spark/semijoin.q.out +++ ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -1885,7 +1885,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Semi Join 1 to 2 keys: 0 key (type: int) @@ -2284,7 +2284,7 @@ STAGE PLANS: Join Operator condition map: Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out index 6eb23e0..d1610ba 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out @@ -281,7 +281,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -295,7 +295,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out index 183ba44..8175984 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out @@ -293,7 +293,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -539,7 +539,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out index ea44efc..8ce2eb2 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out @@ -253,7 +253,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -503,7 +503,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out index 11fb39d..80f38fa 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out @@ -252,7 +252,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -500,7 +500,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out index eda1afa..399edaf 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -375,7 +375,7 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -685,7 +685,7 @@ STAGE PLANS: Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -861,7 +861,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -973,7 +973,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1082,8 +1082,8 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out index 9abc627..1b71cdb 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -375,7 +375,7 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -685,7 +685,7 @@ STAGE PLANS: Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -861,7 +861,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -973,7 +973,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1082,8 +1082,8 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out index 610abab..1732927 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out @@ -631,7 +631,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index 352e74f..5954629 100644 --- ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -89,10 +89,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -372,10 +371,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -424,10 +422,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -575,10 +572,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -627,10 +623,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -856,11 +851,10 @@ STAGE PLANS: Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [6] + keyColumns: 6:int keyExpressions: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -909,11 +903,10 @@ STAGE PLANS: Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [4] + keyColumns: 4:int keyExpressions: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1060,11 +1053,10 @@ STAGE PLANS: Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [6] + keyColumns: 6:int keyExpressions: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1113,11 +1105,10 @@ STAGE PLANS: Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [4] + keyColumns: 4:int keyExpressions: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1342,11 +1333,10 @@ STAGE PLANS: Map-reduce partition columns: abs(((- UDFToLong(concat(CAST( day(CAST( _col0 AS DATE)) AS STRING), '0'))) + 10)) (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [6] + keyColumns: 6:bigint keyExpressions: FuncAbsLongToLong(col 5:bigint)(children: LongColAddLongScalar(col 6:bigint, val 10)(children: LongColUnaryMinus(col 5:bigint)(children: CastStringToLong(col 8:string)(children: StringGroupColConcatStringScalar(col 7:string, val 0)(children: CastLongToString(col 6:int)(children: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int) -> 7:string) -> 8:string) -> 5:bigint) -> 6:bigint) -> 5:bigint) -> 6:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1395,11 +1385,10 @@ STAGE PLANS: Map-reduce partition columns: abs(((- UDFToLong(concat(CAST( day(CAST( _col0 AS DATE)) AS STRING), '0'))) + 10)) (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [4] + keyColumns: 4:bigint keyExpressions: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 4:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 6:string)(children: StringGroupColConcatStringScalar(col 5:string, val 0)(children: CastLongToString(col 4:int)(children: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int) -> 5:string) -> 6:string) -> 3:bigint) -> 4:bigint) -> 3:bigint) -> 4:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1616,11 +1605,10 @@ STAGE PLANS: Map-reduce partition columns: CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [7] + keyColumns: 7:decimal(10,0) keyExpressions: CastLongToDecimal(col 6:smallint)(children: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int) -> 7:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1669,11 +1657,10 @@ STAGE PLANS: Map-reduce partition columns: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [5] + keyColumns: 5:decimal(10,0) keyExpressions: CastLongToDecimal(col 4:int)(children: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int) -> 5:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1955,10 +1942,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:string Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized @@ -2008,10 +1995,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2059,10 +2045,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2237,10 +2222,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:string Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized @@ -2290,10 +2275,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2341,10 +2325,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2616,10 +2599,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 3] + keyColumns: 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2668,10 +2650,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2819,10 +2800,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 3] + keyColumns: 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2871,10 +2851,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3100,10 +3079,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3152,10 +3130,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3303,10 +3280,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3355,10 +3331,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3585,11 +3560,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(_col0) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [5] + keyColumns: 5:double keyExpressions: CastStringToDouble(col 3:string) -> 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3638,11 +3612,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(UDFToInteger((_col0 / 2.0D))) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [3] + keyColumns: 3:double keyExpressions: CastLongToDouble(col 4:int)(children: CastDoubleToLong(col 3:double)(children: DoubleColDivideDoubleScalar(col 0:double, val 2.0) -> 3:double) -> 4:int) -> 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3859,11 +3832,10 @@ STAGE PLANS: Map-reduce partition columns: (UDFToDouble(_col0) * 2.0D) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [6] + keyColumns: 6:double keyExpressions: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3912,10 +3884,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4063,11 +4034,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(_col0) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [5] + keyColumns: 5:double keyExpressions: CastStringToDouble(col 3:string) -> 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4116,11 +4086,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(UDFToInteger((_col0 / 2.0D))) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [3] + keyColumns: 3:double keyExpressions: CastLongToDouble(col 4:int)(children: CastDoubleToLong(col 3:double)(children: DoubleColDivideDoubleScalar(col 0:double, val 2.0) -> 3:double) -> 4:int) -> 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4268,11 +4237,10 @@ STAGE PLANS: Map-reduce partition columns: (UDFToDouble(_col0) * 2.0D) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [6] + keyColumns: 6:double keyExpressions: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4321,10 +4289,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4551,11 +4518,10 @@ STAGE PLANS: Map-reduce partition columns: CAST( (UDFToDouble(_col0) * 2.0D) AS STRING) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [7] + keyColumns: 7:string keyExpressions: CastDoubleToString(col 6:double)(children: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double) -> 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4604,11 +4570,10 @@ STAGE PLANS: Map-reduce partition columns: CAST( _col0 AS STRING) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [4] + keyColumns: 4:string keyExpressions: CastDoubleToString(col 0:double) -> 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4763,10 +4728,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4818,10 +4781,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4939,10 +4901,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -5015,10 +4975,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:string, 3:string Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized @@ -5066,10 +5025,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 2] + valueColumns: 0:string, 2:string Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col2 (type: string) Execution mode: vectorized @@ -5316,10 +5274,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 3] + keyColumns: 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5368,10 +5325,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5588,10 +5544,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5640,10 +5595,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5848,10 +5802,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5891,10 +5844,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6092,10 +6044,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6144,10 +6095,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6410,10 +6360,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:string Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized @@ -6463,10 +6413,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6514,10 +6463,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6695,10 +6643,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6746,10 +6693,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [3] + keyColumns: 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:string Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -6796,10 +6743,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6973,10 +6919,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7028,10 +6973,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7224,10 +7168,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -7278,10 +7221,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7333,10 +7275,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7470,10 +7411,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized @@ -7528,10 +7468,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -7610,10 +7549,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7665,10 +7603,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7861,10 +7798,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -7915,10 +7851,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7970,10 +7905,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8109,10 +8043,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized @@ -8167,10 +8100,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -8250,10 +8182,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8305,10 +8236,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8505,10 +8435,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -8559,10 +8488,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8614,10 +8542,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8671,10 +8598,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Vectorization: @@ -8751,10 +8677,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 9 Execution mode: vectorized @@ -8809,10 +8734,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -8962,10 +8886,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -8985,10 +8912,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -9204,11 +9130,14 @@ STAGE PLANS: 0 day(CAST( _col0 AS DATE)) (type: int) 1 day(CAST( _col0 AS DATE)) (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [6] + bigTableKeyColumns: 6:int bigTableKeyExpressions: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -9228,10 +9157,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -9511,13 +9439,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -9529,10 +9459,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [3] + bigTableKeyColumns: 3:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 4 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE @@ -9552,10 +9485,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -9797,10 +9729,13 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col2 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2, 3] + bigTableKeyColumns: 2:string, 3:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -9820,10 +9755,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10039,10 +9973,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -10062,10 +9999,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10273,11 +10209,14 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(UDFToInteger((_col0 / 2.0D))) (type: double) Map Join Vectorization: - bigTableKeyColumnNums: [5] + bigTableKeyColumns: 5:double bigTableKeyExpressions: CastStringToDouble(col 3:string) -> 5:double + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -10297,10 +10236,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10507,11 +10445,14 @@ STAGE PLANS: 0 (UDFToDouble(_col0) * 2.0D) (type: double) 1 _col0 (type: double) Map Join Vectorization: - bigTableKeyColumnNums: [6] + bigTableKeyColumns: 6:double bigTableKeyExpressions: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -10531,10 +10472,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10683,10 +10623,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -10777,9 +10716,12 @@ STAGE PLANS: 0 1 Map Join Vectorization: + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Reducer 4 Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE @@ -10799,10 +10741,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11013,10 +10954,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -11036,10 +10980,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11204,10 +11147,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -11227,10 +11172,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11394,10 +11338,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED input vertices: 0 Map 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -11417,10 +11363,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11680,13 +11625,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -11698,10 +11645,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [3] + bigTableKeyColumns: 3:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 4 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE @@ -11721,10 +11671,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11964,12 +11913,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col1 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [4] - smallTableMapping: [4] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -11981,10 +11933,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [4] + bigTableKeyColumns: 4:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 4 Statistics: Num rows: 1 Data size: 207 Basic stats: PARTIAL Column stats: NONE @@ -12004,10 +11959,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -12141,10 +12095,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12196,10 +12149,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12392,10 +12344,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -12446,10 +12397,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12501,10 +12451,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12640,10 +12589,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized @@ -12698,10 +12646,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/spark/union14.q.out ql/src/test/results/clientpositive/spark/union14.q.out index 6a95e4a..49d6cb1 100644 --- ql/src/test/results/clientpositive/spark/union14.q.out +++ ql/src/test/results/clientpositive/spark/union14.q.out @@ -126,20 +126,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -278 1 -273 1 + 10 128 1 -255 1 -tst1 1 146 1 -369 1 +150 1 213 1 -311 1 224 1 238 1 -150 1 - 10 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 406 1 66 1 -401 1 98 1 +tst1 1 diff --git ql/src/test/results/clientpositive/spark/union7.q.out ql/src/test/results/clientpositive/spark/union7.q.out index 549075c..8556f84 100644 --- ql/src/test/results/clientpositive/spark/union7.q.out +++ ql/src/test/results/clientpositive/spark/union7.q.out @@ -122,20 +122,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -278 1 -273 1 + 10 128 1 -255 1 -tst1 1 146 1 -369 1 +150 1 213 1 -311 1 224 1 238 1 -150 1 - 10 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 406 1 66 1 -401 1 98 1 +tst1 1 diff --git ql/src/test/results/clientpositive/spark/union_null.q.out ql/src/test/results/clientpositive/spark/union_null.q.out index d37adbb..696641c 100644 --- ql/src/test/results/clientpositive/spark/union_null.q.out +++ ql/src/test/results/clientpositive/spark/union_null.q.out @@ -24,16 +24,16 @@ POSTHOOK: query: select x from (select * from (select value as x from src order POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -val_0 -val_0 -val_0 -val_10 -val_100 NULL NULL NULL NULL NULL +val_0 +val_0 +val_0 +val_10 +val_100 PREHOOK: query: select * from (select * from (select cast(null as string) as N from src1 group by key)a UNION ALL select * from (select cast(null as string) as N from src1 group by key)b ) a PREHOOK: type: QUERY PREHOOK: Input: default@src1 diff --git ql/src/test/results/clientpositive/spark/union_view.q.out ql/src/test/results/clientpositive/spark/union_view.q.out index 591ebfa..97a5bef 100644 --- ql/src/test/results/clientpositive/spark/union_view.q.out +++ ql/src/test/results/clientpositive/spark/union_view.q.out @@ -483,10 +483,10 @@ STAGE PLANS: 86 val_86 2 86 val_86 3 86 val_86 3 -86 val_86 2 -86 val_86 2 86 val_86 3 86 val_86 3 +86 val_86 2 +86 val_86 2 86 val_86 1 STAGE DEPENDENCIES: Stage-1 is a root stage diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index e3d815b..6e33ead 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -99,10 +99,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 7:decimal(23,14), 8:decimal(33,14), 9:bigint Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized @@ -277,10 +277,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:double, 6:double, 7:bigint, 8:decimal(23,14), 9:decimal(23,14), 10:decimal(33,14), 11:double, 12:double, 13:bigint Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized @@ -489,10 +489,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:bigint, 6:decimal(16,0), 7:decimal(16,0), 8:decimal(26,0), 9:bigint Statistics: Num rows: 12289 Data size: 346462 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)), _col9 (type: bigint) Execution mode: vectorized @@ -686,10 +686,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:double, 6:double, 7:bigint, 8:decimal(16,0), 9:decimal(16,0), 10:decimal(26,0), 11:double, 12:double, 13:bigint Statistics: Num rows: 12289 Data size: 346462 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index 54f08fe..efab3f0 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -137,13 +137,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 0 Map 1 @@ -319,13 +321,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -517,13 +521,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -687,14 +693,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:int, 1:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -850,14 +858,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1022,14 +1032,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -1193,14 +1205,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1364,14 +1378,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [1] + projectedOutput: 0:string, 1:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col2, _col3 input vertices: 1 Map 2 @@ -1535,14 +1551,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 108d326..63d13fa 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -206,6 +206,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 @@ -220,6 +221,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 @@ -483,6 +485,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3, _col4 input vertices: 1 Map 3 @@ -497,6 +500,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out index c36c9ec..a35a2df 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out @@ -151,15 +151,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableOuterKeyMapping: 1 -> 3 - bigTableRetainedColumnNums: [0, 1, 3] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 4] - smallTableMapping: [4] + outerSmallTableKeyMapping: 1 -> 3 + projectedOutput: 0:string, 1:int, 3:int, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -304,15 +305,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableOuterKeyMapping: 0 -> 4 - bigTableRetainedColumnNums: [0, 1, 4] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 4, 0, 1] - smallTableMapping: [3] + outerSmallTableKeyMapping: 0 -> 4 + projectedOutput: 3:string, 4:int, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index ecac4da..81eb1b9 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -311,15 +311,16 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableOuterKeyMapping: 2 -> 15 - bigTableRetainedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15] - bigTableValueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableValueColumns: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - smallTableMapping: [13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24] + outerSmallTableKeyMapping: 2 -> 15 + projectedOutput: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean, 13:tinyint, 14:smallint, 15:int, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + smallTableValueMapping: 13:tinyint, 14:smallint, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 @@ -487,13 +488,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -796,13 +798,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 3 @@ -814,13 +817,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 4 @@ -841,10 +845,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 92ad63e..86deb7c 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -370,13 +370,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -388,13 +389,14 @@ STAGE PLANS: 0 _col1 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: - bigTableKeyColumnNums: [3] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 3:bigint + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 4 @@ -415,10 +417,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index bd88187..32219ac 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -62,10 +62,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -117,10 +116,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -241,10 +240,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -296,10 +294,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -568,10 +565,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -623,10 +619,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -747,10 +743,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -802,10 +797,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1074,10 +1068,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:float, 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -1129,10 +1122,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1253,10 +1246,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized @@ -1308,10 +1300,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1627,10 +1618,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6] + valueColumns: 0:bigint, 1:bigint, 2:double, 3:double, 4:bigint, 5:double, 6:tinyint Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_1.q.out ql/src/test/results/clientpositive/spark/vectorization_1.q.out index e6d6e64..e933da7 100644 --- ql/src/test/results/clientpositive/spark/vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_1.q.out @@ -96,10 +96,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 0:double, 1:double, 2:bigint, 3:double, 4:tinyint, 5:int, 6:double, 7:double, 8:bigint, 9:bigint Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: tinyint), _col5 (type: int), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_12.q.out ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 36f474d..7bfc4ce 100644 --- ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -123,10 +123,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:double, 1:bigint, 2:string, 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8, 9, 10] + valueColumns: 4:bigint, 5:double, 6:double, 7:double, 8:bigint, 9:bigint, 10:double Statistics: Num rows: 3754 Data size: 888395 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized @@ -189,10 +189,10 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:bigint, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] + valueColumns: 3:boolean, 11:double, 12:bigint, 4:bigint, 13:bigint, 14:double, 19:double, 15:double, 20:double, 22:double, 24:decimal(22,2), 9:bigint, 26:double, 25:double, 21:double, 27:double Statistics: Num rows: 1877 Data size: 444197 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index 4d1fd3f..cc86bd4 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -125,10 +125,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + valueColumns: 5:tinyint, 6:double, 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:bigint, 13:float, 14:tinyint Statistics: Num rows: 2730 Data size: 646063 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized @@ -191,10 +191,9 @@ STAGE PLANS: sort order: +++++++++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string, 15:tinyint, 5:tinyint, 17:tinyint, 6:double, 20:double, 19:double, 21:float, 22:double, 23:double, 24:double, 27:decimal(7,3), 28:double, 25:double, 13:float, 31:double, 14:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1365 Data size: 323031 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 93c26ca..919fc85 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -125,10 +125,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp, 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10, 11] + valueColumns: 5:double, 6:double, 7:bigint, 8:float, 9:double, 10:double, 11:bigint Statistics: Num rows: 606 Data size: 143411 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized @@ -191,10 +191,10 @@ STAGE PLANS: sort order: ++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] + valueColumns: 4:boolean, 12:double, 14:double, 13:double, 15:float, 8:float, 19:float, 20:float, 21:double, 22:double, 11:bigint, 24:double, 25:double, 23:double, 29:double, 28:double, 31:double, 34:double Statistics: Num rows: 303 Data size: 71705 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 6672776..1cea297 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -121,10 +121,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4, 5, 6] + keyColumns: 0:float, 1:boolean, 2:double, 3:string, 4:tinyint, 5:int, 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + valueColumns: 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:double, 13:bigint, 14:double, 15:double, 16:bigint Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index 3ceb751..adb0491 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -98,10 +98,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:double, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6] + valueColumns: 3:bigint, 4:double, 5:double, 6:double Statistics: Num rows: 4096 Data size: 969331 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_17.q.out ql/src/test/results/clientpositive/spark/vectorization_17.q.out index 6df2bd1..581136f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -91,10 +91,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] + keyColumns: 3:bigint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] + valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 14:double, 17:double, 19:double, 20:double, 22:decimal(11,4), 18:double Statistics: Num rows: 4096 Data size: 969331 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_2.q.out ql/src/test/results/clientpositive/spark/vectorization_2.q.out index b521c5b..641e61e 100644 --- ql/src/test/results/clientpositive/spark/vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_2.q.out @@ -100,10 +100,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 0:bigint, 1:bigint, 2:double, 3:double, 4:double, 5:bigint, 6:bigint, 7:tinyint, 8:double, 9:bigint Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_3.q.out ql/src/test/results/clientpositive/spark/vectorization_3.q.out index d329bb8..1baaaf6 100644 --- ql/src/test/results/clientpositive/spark/vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_3.q.out @@ -105,10 +105,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + valueColumns: 0:double, 1:double, 2:bigint, 3:double, 4:double, 5:bigint, 6:double, 7:double, 8:bigint, 9:double, 10:bigint, 11:bigint, 12:double, 13:double Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_4.q.out ql/src/test/results/clientpositive/spark/vectorization_4.q.out index 28d07dd..c67b97f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_4.q.out @@ -100,10 +100,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:bigint, 1:double, 2:double, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_5.q.out ql/src/test/results/clientpositive/spark/vectorization_5.q.out index a5e4ade..da9a4d3 100644 --- ql/src/test/results/clientpositive/spark/vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_5.q.out @@ -93,10 +93,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:smallint, 1:bigint, 2:smallint, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index 3ceb751..adb0491 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -98,10 +98,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:double, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6] + valueColumns: 3:bigint, 4:double, 5:double, 6:double Statistics: Num rows: 4096 Data size: 969331 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index 2871c1a..40e13bb 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -52,10 +52,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 8296a65..bfad0b7 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -314,10 +314,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized @@ -456,10 +455,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 131f692..fdf337d 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -102,6 +102,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 0c48310..2ef9e03 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -154,11 +154,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -406,10 +406,10 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 5] + valueColumns: 1:string, 2:string, 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Execution mode: vectorized @@ -450,10 +450,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -658,11 +657,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -814,11 +813,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -1063,11 +1062,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -1281,11 +1280,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -1501,11 +1500,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized @@ -1546,10 +1545,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1726,10 +1724,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1761,11 +1758,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized @@ -2403,11 +2400,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -2651,11 +2648,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -2968,11 +2965,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -3186,11 +3183,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -3231,10 +3228,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3479,11 +3475,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -3714,11 +3710,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: double) Execution mode: vectorized @@ -3971,11 +3967,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -4460,10 +4456,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized @@ -4805,10 +4801,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized @@ -5146,10 +5142,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -5464,10 +5460,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -5823,10 +5819,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -6152,10 +6148,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/stat_estimate_drill.q.out ql/src/test/results/clientpositive/stat_estimate_drill.q.out index cb02522..84bf86f 100644 --- ql/src/test/results/clientpositive/stat_estimate_drill.q.out +++ ql/src/test/results/clientpositive/stat_estimate_drill.q.out @@ -84,16 +84,16 @@ STAGE PLANS: TableScan alias: t3 filterExpr: (b) IN (2, 3) (type: boolean) - Statistics: Num rows: 1000/1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000/1 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (b) IN (2, 3) (type: boolean) - Statistics: Num rows: 200/200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200/1 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: b (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2/2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2/1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -149,16 +149,16 @@ STAGE PLANS: TableScan alias: t3 filterExpr: (a) IN (1, 2) (type: boolean) - Statistics: Num rows: 1000/1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000/1 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (a) IN (1, 2) (type: boolean) - Statistics: Num rows: 200/200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200/1 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: b (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 10/10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10/1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -214,16 +214,16 @@ STAGE PLANS: TableScan alias: t3 filterExpr: ((a = 1) or ((a = 2) and (b = 3))) (type: boolean) - Statistics: Num rows: 1000/1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000/1 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((a = 2) and (b = 3)) or (a = 1)) (type: boolean) - Statistics: Num rows: 110/110 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110/1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: b (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 10/10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10/1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -279,20 +279,20 @@ STAGE PLANS: TableScan alias: t3 filterExpr: (a = 1) (type: boolean) - Statistics: Num rows: 1000/1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000/1 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (a = 1) (type: boolean) - Statistics: Num rows: 100/100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100/1 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: b (type: int) outputColumnNames: _col0 - Statistics: Num rows: 100/100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100/1 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 10/10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10/1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -348,12 +348,12 @@ STAGE PLANS: TableScan alias: t3 filterExpr: ((a = 1) and (b = 2)) (type: boolean) - Statistics: Num rows: 1000/1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000/1 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((a = 1) and (b = 2)) (type: boolean) - Statistics: Num rows: 10/10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10/1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 10/10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10/1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(1) keys: true (type: boolean) @@ -415,7 +415,7 @@ STAGE PLANS: TableScan alias: t3 filterExpr: ((a = 1) and (b = 2) and (c = 3)) (type: boolean) - Statistics: Num rows: 1000/1000 Data size: 12000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000/1 Data size: 12000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((a = 1) and (b = 2) and (c = 3)) (type: boolean) Statistics: Num rows: 1/1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE @@ -482,16 +482,16 @@ STAGE PLANS: TableScan alias: t3 filterExpr: (struct(a,b)) IN (const struct(1,2), const struct(2,3), const struct(3,4)) (type: boolean) - Statistics: Num rows: 1000/1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000/1 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (struct(a,b)) IN (const struct(1,2), const struct(2,3), const struct(3,4)) (type: boolean) - Statistics: Num rows: 30/30 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 30/1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: b (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 10/3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10/1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + diff --git ql/src/test/results/clientpositive/stat_estimate_related_col.q.out ql/src/test/results/clientpositive/stat_estimate_related_col.q.out index 2a2b7a8..ac8b930 100644 --- ql/src/test/results/clientpositive/stat_estimate_related_col.q.out +++ ql/src/test/results/clientpositive/stat_estimate_related_col.q.out @@ -89,16 +89,16 @@ STAGE PLANS: TableScan alias: t8 filterExpr: (b) IN (2, 3) (type: boolean) - Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40/1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (b) IN (2, 3) (type: boolean) - Statistics: Num rows: 16/16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16/1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: b (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2/2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2/1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -154,14 +154,14 @@ STAGE PLANS: TableScan alias: t8 filterExpr: (b = 2) (type: boolean) - Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40/1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (b = 2) (type: boolean) - Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8/1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: a - Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8/1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: 2 (type: int) @@ -223,7 +223,7 @@ STAGE PLANS: TableScan alias: t1 filterExpr: ((2 = b) and (b = 2)) (type: boolean) - Statistics: Num rows: 5/5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5/1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((2 = b) and (b = 2)) (type: boolean) Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -292,7 +292,7 @@ STAGE PLANS: TableScan alias: t1 filterExpr: ((b) IN (2, 3) and (b = 2)) (type: boolean) - Statistics: Num rows: 5/5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5/1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((b = 2) and (b) IN (2, 3)) (type: boolean) Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -361,14 +361,14 @@ STAGE PLANS: TableScan alias: t8 filterExpr: ((b) IN (2, 3) and (b = 2)) (type: boolean) - Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40/1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((b = 2) and (b) IN (2, 3)) (type: boolean) - Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8/1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: a - Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8/1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: 2 (type: int) @@ -524,14 +524,14 @@ STAGE PLANS: TableScan alias: t8 filterExpr: ((b) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50) and (b = 2) and (2 = b)) (type: boolean) - Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40/1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((2 = b) and (b = 2) and (b) IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) (type: boolean) - Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8/1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: a - Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8/1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: 2 (type: int) @@ -593,14 +593,14 @@ STAGE PLANS: TableScan alias: t8 filterExpr: ((b = 2) and ((b = 1) or (b = 2))) (type: boolean) - Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40/1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((b = 1) or (b = 2)) and (b = 2)) (type: boolean) - Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8/1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: a - Statistics: Num rows: 8/8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8/1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(a) keys: 2 (type: int) @@ -662,7 +662,7 @@ STAGE PLANS: TableScan alias: t8 filterExpr: ((b = 2) and ((b = 1) or (b = 2)) and ((b = 1) or (b = 3))) (type: boolean) - Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40/1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((b = 1) or (b = 2)) and ((b = 1) or (b = 3)) and (b = 2)) (type: boolean) Statistics: Num rows: 8/0 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -747,7 +747,7 @@ STAGE PLANS: TableScan alias: t8 filterExpr: ((b = 2) and ((b = 1) or (b = 2)) and (a = 3) and ((a = 3) or (a = 4))) (type: boolean) - Statistics: Num rows: 40/40 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40/1 Data size: 320 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((a = 3) or (a = 4)) and ((b = 1) or (b = 2)) and (a = 3) and (b = 2)) (type: boolean) Statistics: Num rows: 2/0 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index dee97db..9ca6453 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -353,9 +353,9 @@ Stage-3 Map 1 vectorized File Output Operator [FS_4] table:{"name:":"default.src_autho_test_n4"} - Select Operator [SEL_3] (rows=500/500 width=178) + Select Operator [SEL_3] (rows=500/1 width=178) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500/500 width=178) + TableScan [TS_0] (rows=500/1 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] Stage-0 Move Operator @@ -612,15 +612,15 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_10] - Limit [LIM_9] (rows=5/5 width=178) + Limit [LIM_9] (rows=5/3 width=178) Number of rows:5 - Select Operator [SEL_8] (rows=500/5 width=178) + Select Operator [SEL_8] (rows=500/3 width=178) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_7] - Select Operator [SEL_6] (rows=500/500 width=178) + Select Operator [SEL_6] (rows=500/1 width=178) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500/500 width=178) + TableScan [TS_0] (rows=500/1 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: create table orc_merge5_n1 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc @@ -679,7 +679,7 @@ Stage-3 Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_12] (rows=1/3 width=352) predicate:(userid <= 13L) - TableScan [TS_0] (rows=1/15000 width=352) + TableScan [TS_0] (rows=1/15 width=352) default@orc_merge5_n1,orc_merge5_n1,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] PARTITION_ONLY_SHUFFLE [RS_16] Select Operator [SEL_15] (rows=1/3 width=352) @@ -847,23 +847,23 @@ Stage-0 Stage-1 Map 2 vectorized File Output Operator [FS_34] - Select Operator [SEL_33] (rows=399/480 width=186) + Select Operator [SEL_33] (rows=399/50 width=186) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_32] (rows=399/480 width=186) + Map Join Operator [MAPJOIN_32] (rows=399/50 width=186) BucketMapJoin:true,Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized MULTICAST [RS_29] PartitionCols:_col0 - Select Operator [SEL_28] (rows=242/242 width=95) + Select Operator [SEL_28] (rows=242/4 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_27] (rows=242/242 width=95) + Filter Operator [FIL_27] (rows=242/4 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=95) + TableScan [TS_0] (rows=242/4 width=95) default@tab_n2,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_31] (rows=500/500 width=95) + <-Select Operator [SEL_31] (rows=500/4 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_30] (rows=500/500 width=95) + Filter Operator [FIL_30] (rows=500/4 width=95) predicate:key is not null - TableScan [TS_3] (rows=500/500 width=95) + TableScan [TS_3] (rows=500/4 width=95) default@tab_part_n3,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out index 7d06030..a6acaf0 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out @@ -338,7 +338,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_10] Map Join Operator [MAPJOIN_27] (rows=1501/10 width=236) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col2 @@ -433,7 +433,7 @@ Stage-0 <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_10] Map Join Operator [MAPJOIN_28] (rows=1501/10 width=236) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true <-Map 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 @@ -531,7 +531,7 @@ Stage-0 SHUFFLE [RS_10] PartitionCols:_col0 Map Join Operator [MAPJOIN_30] (rows=1501/10 width=236) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0"] <-Map 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 diff --git ql/src/test/results/clientpositive/union_null.q.out ql/src/test/results/clientpositive/union_null.q.out index 00bd9d9..696641c 100644 --- ql/src/test/results/clientpositive/union_null.q.out +++ ql/src/test/results/clientpositive/union_null.q.out @@ -6,16 +6,16 @@ POSTHOOK: query: select x from (select * from (select value as x from src order POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -val_0 -val_0 -val_0 -val_10 -val_100 NULL NULL NULL NULL NULL +val_0 +val_0 +val_0 +val_10 +val_100 PREHOOK: query: select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select cast(NULL as string) as x from src limit 5)b )a PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -82,5 +82,5 @@ POSTHOOK: query: select null as c1 UNION ALL select 1 as c1 POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -NULL 1 +NULL diff --git ql/src/test/results/clientpositive/union_offcbo.q.out ql/src/test/results/clientpositive/union_offcbo.q.out index 94d197f..90acc19 100644 --- ql/src/test/results/clientpositive/union_offcbo.q.out +++ ql/src/test/results/clientpositive/union_offcbo.q.out @@ -281,7 +281,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -373,7 +373,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -969,7 +969,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -1075,7 +1075,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -1322,7 +1322,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -1426,7 +1426,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out index 7f2f17f..8192435 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -65,6 +65,7 @@ POSTHOOK: Output: default@tjoin1 POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE PREHOOK: type: QUERY PREHOOK: Input: default@tjoin2stage @@ -76,12 +77,14 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain vectorization expression +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -166,15 +169,21 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + STAGE DEPENDENCIES: Stage-4 is a root stage Stage-3 depends on stages: Stage-4 @@ -255,15 +264,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -305,6 +316,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -359,6 +371,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -378,15 +396,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -428,6 +448,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -482,6 +503,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -501,15 +528,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -551,6 +580,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -605,6 +635,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -624,15 +660,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -674,6 +712,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -728,6 +767,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -747,6 +792,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/vectorized_join46_mr.q.out ql/src/test/results/clientpositive/vectorized_join46_mr.q.out new file mode 100644 index 0000000..9df8c28 --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_join46_mr.q.out @@ -0,0 +1,2095 @@ +PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), + (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test1 +POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), + (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test1 +POSTHOOK: Lineage: test1.col_1 SCRIPT [] +POSTHOOK: Lineage: test1.key SCRIPT [] +POSTHOOK: Lineage: test1.value SCRIPT [] +col1 col2 col3 +PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2 +POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2 +PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), + (104, 3, 'Fli'), (105, NULL, 'None') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test2 +POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), + (104, 3, 'Fli'), (105, NULL, 'None') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test2 +POSTHOOK: Lineage: test2.col_2 SCRIPT [] +POSTHOOK: Lineage: test2.key SCRIPT [] +POSTHOOK: Lineage: test2.value SCRIPT [] +col1 col2 col3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + filterExpr: key BETWEEN 100 AND 102 (type: boolean) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key BETWEEN 100 AND 102 (type: boolean) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + filterExpr: key BETWEEN 100 AND 102 (type: boolean) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key BETWEEN 100 AND 102 (type: boolean) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 + 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 + 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL None NULL NULL NULL +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL diff --git serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java index c6b77ed..7e10af7 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java @@ -599,8 +599,6 @@ public static Object deserializeReadComplexType(DeserializeRead deserializeRead, return getComplexField(deserializeRead, typeInfo); } - static int fake = 0; - private static Object getComplexField(DeserializeRead deserializeRead, TypeInfo typeInfo) throws IOException { switch (typeInfo.getCategory()) { diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java index dcbba7a..3095114 100644 --- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java @@ -38,8 +38,6 @@ private static int TEST_COUNT = 5000; - private static int fake = 0; - @Test public void testSaveAndRetrieve() throws Exception {